[llvm] [SelectionDAG] Use unaligned store to legalize `EXTRACT_VECTOR_ELT` type (PR #98176)
Manish Kausik H via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 12 08:47:59 PDT 2024
https://github.com/Nirhar updated https://github.com/llvm/llvm-project/pull/98176
>From ac220b623f90757f1dddf4ef5e1ee2eba06ce467 Mon Sep 17 00:00:00 2001
From: Manish Kausik H <hmamishkausik at gmail.com>
Date: Fri, 12 Jul 2024 21:14:13 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store to legalize
`EXTRACT_VECTOR_ELT` type
This patch sets the alignment of store instructions generated during type
legalization of extractelement instruction, after considering stack alignment.
Fixes #98044
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 5 +-
llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 141 ++++++-----------
llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 117 +++++---------
.../CodeGen/RISCV/rvv/extractelt-int-rv32.ll | 44 ++----
.../CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 44 ++----
.../RISCV/rvv/fixed-vectors-extract-i1.ll | 146 +++---------------
.../RISCV/rvv/fixed-vectors-extract.ll | 75 +++------
llvm/test/CodeGen/RISCV/rvv/pr88576.ll | 22 +--
llvm/test/CodeGen/X86/avx512fp16-mov.ll | 26 ++--
llvm/test/CodeGen/X86/gep-expanded-vector.ll | 26 ++--
llvm/test/CodeGen/X86/i64-mem-copy.ll | 40 ++---
...igned_extract_from_vector_through_stack.ll | 18 +++
llvm/test/CodeGen/X86/vector-extend-inreg.ll | 74 +++++----
13 files changed, 258 insertions(+), 520 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bbf08e862da12..8887ddbb2a69c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
@@ -3531,7 +3532,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Store the vector to the stack.
// In cases where the vector is illegal it will be broken down into parts
// and stored in parts - we should use the alignment for the smallest part.
- Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ Align SmallestAlign =
+ std::min(DAG.getSubtarget().getFrameLowering()->getStackAlign(),
+ DAG.getReducedAlign(VecVT, /*UseABI=*/false));
SDValue StackPtr =
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
index 3b7952f9f5e6d..2f2cecfa01aad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
@@ -585,19 +585,13 @@ define double @extractelt_nxv16f64_0(<vscale x 16 x double> %v) {
define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV32-LABEL: extractelt_nxv16f64_neg1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -80
-; RV32-NEXT: .cfi_def_cfa_offset 80
-; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 80
-; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a2, a1, 3
@@ -606,27 +600,21 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: fld fa0, -8(a0)
-; RV32-NEXT: addi sp, s0, -80
-; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 80
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: extractelt_nxv16f64_neg1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -80
-; RV64-NEXT: .cfi_def_cfa_offset 80
-; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 80
-; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: sub sp, sp, a0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a1, a2, 3
@@ -643,10 +631,10 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: fld fa0, 0(a0)
-; RV64-NEXT: addi sp, s0, -80
-; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 -1
ret double %r
@@ -664,75 +652,34 @@ define double @extractelt_nxv16f64_imm(<vscale x 16 x double> %v) {
}
define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %idx) {
-; RV32-LABEL: extractelt_nxv16f64_idx:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: bltu a0, a2, .LBB54_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB54_2:
-; RV32-NEXT: addi sp, sp, -80
-; RV32-NEXT: .cfi_def_cfa_offset 80
-; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 80
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 4
-; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: addi a2, sp, 64
-; RV32-NEXT: add a0, a2, a0
-; RV32-NEXT: vs8r.v v8, (a2)
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, a2, a1
-; RV32-NEXT: vs8r.v v16, (a1)
-; RV32-NEXT: fld fa0, 0(a0)
-; RV32-NEXT: addi sp, s0, -80
-; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 80
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_nxv16f64_idx:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: addi a2, a2, -1
-; RV64-NEXT: bltu a0, a2, .LBB54_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: .LBB54_2:
-; RV64-NEXT: addi sp, sp, -80
-; RV64-NEXT: .cfi_def_cfa_offset 80
-; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 80
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 4
-; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: addi a2, sp, 64
-; RV64-NEXT: add a0, a2, a0
-; RV64-NEXT: vs8r.v v8, (a2)
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, a2, a1
-; RV64-NEXT: vs8r.v v16, (a1)
-; RV64-NEXT: fld fa0, 0(a0)
-; RV64-NEXT: addi sp, s0, -80
-; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 80
-; RV64-NEXT: ret
+; CHECK-LABEL: extractelt_nxv16f64_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: bltu a0, a2, .LBB54_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: .LBB54_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: vs8r.v v8, (a2)
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: vs8r.v v16, (a1)
+; CHECK-NEXT: fld fa0, 0(a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 %idx
ret double %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 14719e190a693..44ef639b68e29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -122,85 +122,41 @@ define i1 @extractelt_nxv64i1(ptr %x, i64 %idx) nounwind {
}
define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
-; RV32-LABEL: extractelt_nxv128i1:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a3, a2, 4
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: bltu a1, a3, .LBB7_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB7_2:
-; RV32-NEXT: addi sp, sp, -80
-; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi s0, sp, 80
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: sub sp, sp, a3
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a3, a0, a2
-; RV32-NEXT: vl8r.v v16, (a3)
-; RV32-NEXT: vl8r.v v24, (a0)
-; RV32-NEXT: addi a0, sp, 64
-; RV32-NEXT: add a1, a0, a1
-; RV32-NEXT: vsetvli a3, zero, e8, m8, ta, ma
-; RV32-NEXT: vmseq.vi v8, v16, 0
-; RV32-NEXT: vmseq.vi v0, v24, 0
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32-NEXT: vs8r.v v24, (a0)
-; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v16, 1, v0
-; RV32-NEXT: vs8r.v v8, (a0)
-; RV32-NEXT: lbu a0, 0(a1)
-; RV32-NEXT: addi sp, s0, -80
-; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 80
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_nxv128i1:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 4
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: bltu a1, a3, .LBB7_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a1, a3
-; RV64-NEXT: .LBB7_2:
-; RV64-NEXT: addi sp, sp, -80
-; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-NEXT: addi s0, sp, 80
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 4
-; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: slli a2, a2, 3
-; RV64-NEXT: add a3, a0, a2
-; RV64-NEXT: vl8r.v v16, (a3)
-; RV64-NEXT: vl8r.v v24, (a0)
-; RV64-NEXT: addi a0, sp, 64
-; RV64-NEXT: add a1, a0, a1
-; RV64-NEXT: vsetvli a3, zero, e8, m8, ta, ma
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmseq.vi v0, v24, 0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64-NEXT: vs8r.v v24, (a0)
-; RV64-NEXT: add a0, a0, a2
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v16, 1, v0
-; RV64-NEXT: vs8r.v v8, (a0)
-; RV64-NEXT: lbu a0, 0(a1)
-; RV64-NEXT: addi sp, s0, -80
-; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 80
-; RV64-NEXT: ret
+; CHECK-LABEL: extractelt_nxv128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a3, a2, 4
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: bltu a1, a3, .LBB7_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 4
+; CHECK-NEXT: sub sp, sp, a3
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a3, a0, a2
+; CHECK-NEXT: vl8r.v v16, (a3)
+; CHECK-NEXT: vl8r.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmseq.vi v8, v16, 0
+; CHECK-NEXT: vmseq.vi v0, v24, 0
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: lbu a0, 0(a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%a = load <vscale x 128 x i8>, ptr %x
%b = icmp eq <vscale x 128 x i8> %a, zeroinitializer
%c = extractelement <vscale x 128 x i1> %b, i64 %idx
@@ -311,3 +267,6 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind {
%c = extractelement <vscale x 64 x i1> %b, i64 0
ret i1 %c
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
index df9949e617b80..9b157c879b40b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
@@ -863,19 +863,13 @@ define i32 @extractelt_nxv32i32_0(<vscale x 32 x i32> %v) {
define i32 @extractelt_nxv32i32_neg1(<vscale x 32 x i32> %v) {
; CHECK-LABEL: extractelt_nxv32i32_neg1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -4
-; CHECK-NEXT: .cfi_offset s0, -8
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a0, sp, 64
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a2, a1, 3
@@ -884,10 +878,10 @@ define i32 @extractelt_nxv32i32_neg1(<vscale x 32 x i32> %v) {
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: lw a0, -4(a0)
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%r = extractelement <vscale x 32 x i32> %v, i32 -1
ret i32 %r
@@ -914,30 +908,24 @@ define i32 @extractelt_nxv32i32_idx(<vscale x 32 x i32> %v, i32 %idx) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB74_2:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -4
-; CHECK-NEXT: .cfi_offset s0, -8
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: andi sp, sp, -64
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a2, sp, 64
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: add a0, a2, a0
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: lw a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%r = extractelement <vscale x 32 x i32> %v, i32 %idx
ret i32 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
index a96cf5807e6c1..c1865e1ba66e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -849,19 +849,13 @@ define i64 @extractelt_nxv16i64_0(<vscale x 16 x i64> %v) {
define i64 @extractelt_nxv16i64_neg1(<vscale x 16 x i64> %v) {
; CHECK-LABEL: extractelt_nxv16i64_neg1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a0, sp, 64
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a1, a2, 3
@@ -878,10 +872,10 @@ define i64 @extractelt_nxv16i64_neg1(<vscale x 16 x i64> %v) {
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x i64> %v, i32 -1
ret i64 %r
@@ -908,30 +902,24 @@ define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 zeroext %idx) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB76_2:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: andi sp, sp, -64
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: addi a2, sp, 64
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: add a0, a2, a0
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x i64> %v, i32 %idx
ret i64 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 386c71cf665ce..c9a48c4898e5b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -317,129 +317,29 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
}
define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
-; RV32-LABEL: extractelt_v256i1:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -384
-; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi s0, sp, 384
-; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: andi a1, a1, 255
-; RV32-NEXT: li a2, 128
-; RV32-NEXT: addi a3, a0, 128
-; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; RV32-NEXT: vle8.v v16, (a3)
-; RV32-NEXT: vle8.v v24, (a0)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: add a1, a0, a1
-; RV32-NEXT: vmseq.vi v8, v16, 0
-; RV32-NEXT: vmseq.vi v0, v24, 0
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32-NEXT: vse8.v v24, (a0)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v16, 1, v0
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: lbu a0, 0(a1)
-; RV32-NEXT: addi sp, s0, -384
-; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 384
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_v256i1:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -384
-; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-NEXT: addi s0, sp, 384
-; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: andi a1, a1, 255
-; RV64-NEXT: li a2, 128
-; RV64-NEXT: addi a3, a0, 128
-; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; RV64-NEXT: vle8.v v16, (a3)
-; RV64-NEXT: vle8.v v24, (a0)
-; RV64-NEXT: mv a0, sp
-; RV64-NEXT: add a1, a0, a1
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmseq.vi v0, v24, 0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64-NEXT: vse8.v v24, (a0)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v16, 1, v0
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: lbu a0, 0(a1)
-; RV64-NEXT: addi sp, s0, -384
-; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 384
-; RV64-NEXT: ret
-;
-; RV32ZBS-LABEL: extractelt_v256i1:
-; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: addi sp, sp, -384
-; RV32ZBS-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
-; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
-; RV32ZBS-NEXT: addi s0, sp, 384
-; RV32ZBS-NEXT: andi sp, sp, -128
-; RV32ZBS-NEXT: andi a1, a1, 255
-; RV32ZBS-NEXT: li a2, 128
-; RV32ZBS-NEXT: addi a3, a0, 128
-; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; RV32ZBS-NEXT: vle8.v v16, (a3)
-; RV32ZBS-NEXT: vle8.v v24, (a0)
-; RV32ZBS-NEXT: mv a0, sp
-; RV32ZBS-NEXT: add a1, a0, a1
-; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV32ZBS-NEXT: vmseq.vi v0, v24, 0
-; RV32ZBS-NEXT: vmv.v.i v16, 0
-; RV32ZBS-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32ZBS-NEXT: vse8.v v24, (a0)
-; RV32ZBS-NEXT: vmv1r.v v0, v8
-; RV32ZBS-NEXT: vmerge.vim v8, v16, 1, v0
-; RV32ZBS-NEXT: addi a0, sp, 128
-; RV32ZBS-NEXT: vse8.v v8, (a0)
-; RV32ZBS-NEXT: lbu a0, 0(a1)
-; RV32ZBS-NEXT: addi sp, s0, -384
-; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
-; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
-; RV32ZBS-NEXT: addi sp, sp, 384
-; RV32ZBS-NEXT: ret
-;
-; RV64ZBS-LABEL: extractelt_v256i1:
-; RV64ZBS: # %bb.0:
-; RV64ZBS-NEXT: addi sp, sp, -384
-; RV64ZBS-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
-; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
-; RV64ZBS-NEXT: addi s0, sp, 384
-; RV64ZBS-NEXT: andi sp, sp, -128
-; RV64ZBS-NEXT: andi a1, a1, 255
-; RV64ZBS-NEXT: li a2, 128
-; RV64ZBS-NEXT: addi a3, a0, 128
-; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; RV64ZBS-NEXT: vle8.v v16, (a3)
-; RV64ZBS-NEXT: vle8.v v24, (a0)
-; RV64ZBS-NEXT: mv a0, sp
-; RV64ZBS-NEXT: add a1, a0, a1
-; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV64ZBS-NEXT: vmseq.vi v0, v24, 0
-; RV64ZBS-NEXT: vmv.v.i v16, 0
-; RV64ZBS-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64ZBS-NEXT: vse8.v v24, (a0)
-; RV64ZBS-NEXT: vmv1r.v v0, v8
-; RV64ZBS-NEXT: vmerge.vim v8, v16, 1, v0
-; RV64ZBS-NEXT: addi a0, sp, 128
-; RV64ZBS-NEXT: vse8.v v8, (a0)
-; RV64ZBS-NEXT: lbu a0, 0(a1)
-; RV64ZBS-NEXT: addi sp, s0, -384
-; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
-; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
-; RV64ZBS-NEXT: addi sp, sp, 384
-; RV64ZBS-NEXT: ret
+; CHECK-LABEL: extractelt_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -256
+; CHECK-NEXT: andi a1, a1, 255
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: addi a3, a0, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v16, (a3)
+; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: mv a0, sp
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vmseq.vi v8, v16, 0
+; CHECK-NEXT: vmseq.vi v0, v24, 0
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
+; CHECK-NEXT: vse8.v v24, (a0)
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
+; CHECK-NEXT: addi a0, sp, 128
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: lbu a0, 0(a1)
+; CHECK-NEXT: addi sp, sp, 256
+; CHECK-NEXT: ret
%a = load <256 x i8>, ptr %x
%b = icmp eq <256 x i8> %a, zeroinitializer
%c = extractelement <256 x i1> %b, i64 %idx
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index d309da6df7dc7..7a46465d7ecb8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -700,61 +700,26 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind {
}
define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
-; RV32-LABEL: extractelt_v64i32_idx:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -384
-; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi s0, sp, 384
-; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: andi a1, a1, 63
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: addi a3, a0, 128
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vle32.v v8, (a3)
-; RV32-NEXT: vle32.v v16, (a0)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: add a1, a0, a1
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vadd.vv v16, v16, v16
-; RV32-NEXT: vse32.v v16, (a0)
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: lw a0, 0(a1)
-; RV32-NEXT: addi sp, s0, -384
-; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 384
-; RV32-NEXT: ret
-;
-; RV64-LABEL: extractelt_v64i32_idx:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -384
-; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-NEXT: addi s0, sp, 384
-; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: andi a1, a1, 63
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: li a2, 32
-; RV64-NEXT: addi a3, a0, 128
-; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV64-NEXT: vle32.v v8, (a3)
-; RV64-NEXT: vle32.v v16, (a0)
-; RV64-NEXT: mv a0, sp
-; RV64-NEXT: add a1, a0, a1
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vadd.vv v16, v16, v16
-; RV64-NEXT: vse32.v v16, (a0)
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: lw a0, 0(a1)
-; RV64-NEXT: addi sp, s0, -384
-; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 384
-; RV64-NEXT: ret
+; CHECK-LABEL: extractelt_v64i32_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -256
+; CHECK-NEXT: andi a1, a1, 63
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: addi a3, a0, 128
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a3)
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: mv a0, sp
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vadd.vv v16, v16, v16
+; CHECK-NEXT: vse32.v v16, (a0)
+; CHECK-NEXT: addi a0, sp, 128
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: lw a0, 0(a1)
+; CHECK-NEXT: addi sp, sp, 256
+; CHECK-NEXT: ret
%a = load <64 x i32>, ptr %x
%b = add <64 x i32> %a, %a
%c = extractelement <64 x i32> %b, i32 %idx
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
index b6e0d1e2ff4ae..fbfcdbcd5afab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
@@ -11,19 +11,13 @@ define i1 @foo(<vscale x 16 x i8> %x, i64 %y) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a2, sp, 64
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: add a0, a2, a0
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a2, a1
@@ -36,10 +30,10 @@ define i1 @foo(<vscale x 16 x i8> %x, i64 %y) {
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: lbu a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%a = bitcast <vscale x 16 x i8> %x to <vscale x 128 x i1>
%b = extractelement <vscale x 128 x i1> %a, i64 %y
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index f4eb5b952ae43..42b639d4815a9 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1385,32 +1385,24 @@ define half @extract_f16_8(<32 x half> %x, i64 %idx) nounwind {
define half @extract_f16_9(<64 x half> %x, i64 %idx) nounwind {
; X64-LABEL: extract_f16_9:
; X64: # %bb.0:
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
-; X64-NEXT: andq $-64, %rsp
-; X64-NEXT: subq $192, %rsp
+; X64-NEXT: pushq %rax
; X64-NEXT: andl $63, %edi
-; X64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
-; X64-NEXT: vmovaps %zmm0, (%rsp)
+; X64-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT: movq %rbp, %rsp
-; X64-NEXT: popq %rbp
+; X64-NEXT: popq %rax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; X86-LABEL: extract_f16_9:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-64, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $63, %eax
-; X86-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp)
-; X86-NEXT: vmovaps %zmm0, (%esp)
+; X86-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
+; X86-NEXT: vmovups %zmm0, (%esp)
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: addl $128, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
%res = extractelement <64 x half> %x, i64 %idx
diff --git a/llvm/test/CodeGen/X86/gep-expanded-vector.ll b/llvm/test/CodeGen/X86/gep-expanded-vector.ll
index 943cd3610c9d3..98bde25bb6177 100644
--- a/llvm/test/CodeGen/X86/gep-expanded-vector.ll
+++ b/llvm/test/CodeGen/X86/gep-expanded-vector.ll
@@ -6,10 +6,7 @@
define ptr @malloc_init_state(<64 x ptr> %tmp, i32 %ind) nounwind {
; CHECK-LABEL: malloc_init_state:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: movq %rsp, %rbp
-; CHECK-NEXT: andq $-64, %rsp
-; CHECK-NEXT: subq $576, %rsp # imm = 0x240
+; CHECK-NEXT: subq $392, %rsp # imm = 0x188
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm8 = [16,16,16,16,16,16,16,16]
; CHECK-NEXT: vpaddq %zmm8, %zmm0, %zmm0
@@ -20,18 +17,17 @@ define ptr @malloc_init_state(<64 x ptr> %tmp, i32 %ind) nounwind {
; CHECK-NEXT: vpaddq %zmm8, %zmm5, %zmm5
; CHECK-NEXT: vpaddq %zmm8, %zmm6, %zmm6
; CHECK-NEXT: vpaddq %zmm8, %zmm7, %zmm7
-; CHECK-NEXT: vmovdqa64 %zmm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm6, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm5, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm4, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm3, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa64 %zmm0, (%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm7, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm6, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm5, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm4, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm3, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm2, (%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $63, %edi
-; CHECK-NEXT: movq (%rsp,%rdi,8), %rax
-; CHECK-NEXT: movq %rbp, %rsp
-; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: movq -128(%rsp,%rdi,8), %rax
+; CHECK-NEXT: addq $392, %rsp # imm = 0x188
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/i64-mem-copy.ll b/llvm/test/CodeGen/X86/i64-mem-copy.ll
index 4cdb079d43993..0c8d102066a1a 100644
--- a/llvm/test/CodeGen/X86/i64-mem-copy.ll
+++ b/llvm/test/CodeGen/X86/i64-mem-copy.ll
@@ -123,42 +123,34 @@ define void @PR23476(<5 x i64> %in, ptr %out, i32 %index) nounwind {
;
; X86-LABEL: PR23476:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $80, %esp
-; X86-NEXT: movl 52(%ebp), %eax
+; X86-NEXT: subl $64, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $7, %eax
-; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: movups 8(%ebp), %xmm1
-; X86-NEXT: movups 24(%ebp), %xmm2
-; X86-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
-; X86-NEXT: movaps %xmm1, (%esp)
-; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT: movups {{[0-9]+}}(%esp), %xmm1
+; X86-NEXT: movups {{[0-9]+}}(%esp), %xmm2
+; X86-NEXT: movups %xmm2, {{[0-9]+}}(%esp)
+; X86-NEXT: movups %xmm1, (%esp)
+; X86-NEXT: movups %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: movsd %xmm0, (%ecx)
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: addl $64, %esp
; X86-NEXT: retl
;
; X86AVX-LABEL: PR23476:
; X86AVX: # %bb.0:
-; X86AVX-NEXT: pushl %ebp
-; X86AVX-NEXT: movl %esp, %ebp
-; X86AVX-NEXT: andl $-32, %esp
-; X86AVX-NEXT: subl $96, %esp
+; X86AVX-NEXT: subl $64, %esp
; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86AVX-NEXT: movl 52(%ebp), %eax
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86AVX-NEXT: andl $7, %eax
-; X86AVX-NEXT: movl 48(%ebp), %ecx
-; X86AVX-NEXT: vmovups 8(%ebp), %ymm1
-; X86AVX-NEXT: vmovaps %ymm1, (%esp)
-; X86AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86AVX-NEXT: vmovups {{[0-9]+}}(%esp), %ymm1
+; X86AVX-NEXT: vmovups %ymm1, (%esp)
+; X86AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86AVX-NEXT: vmovsd %xmm0, (%ecx)
-; X86AVX-NEXT: movl %ebp, %esp
-; X86AVX-NEXT: popl %ebp
+; X86AVX-NEXT: addl $64, %esp
; X86AVX-NEXT: vzeroupper
; X86AVX-NEXT: retl
%ext = extractelement <5 x i64> %in, i32 %index
diff --git a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
index 52d0c2b509128..629f44b52bc05 100644
--- a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
+++ b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
@@ -17,4 +17,22 @@ entry:
ret i32 %b
}
+define i32 @foo2(i32 %arg1) #1 {
+; CHECK-LABEL: foo2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: andl $31, %edi
+; CHECK-NEXT: movzwl -72(%rsp,%rdi,2), %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = extractelement <32 x i16> zeroinitializer, i32 %arg1
+ %b = zext i16 %a to i32
+ ret i32 %b
+}
+
attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }
+attributes #1 = { "no-realign-stack" "target-cpu"="skylake" }
diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
index 889ab6a0818e2..d7fe2376ed772 100644
--- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll
+++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
@@ -15,28 +15,28 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X86-SSE-NEXT: movdqa 72(%ebp), %xmm0
; X86-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movaps %xmm1, (%esp)
-; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movdqu %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movdqu %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal (%ecx,%ecx), %eax
; X86-SSE-NEXT: andl $31, %eax
-; X86-SSE-NEXT: movl 128(%esp,%eax,4), %eax
+; X86-SSE-NEXT: movl 136(%esp,%eax,4), %eax
; X86-SSE-NEXT: leal 1(%ecx,%ecx), %ecx
; X86-SSE-NEXT: andl $31, %ecx
-; X86-SSE-NEXT: movl (%esp,%ecx,4), %edx
+; X86-SSE-NEXT: movl 8(%esp,%ecx,4), %edx
; X86-SSE-NEXT: movl %ebp, %esp
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl
@@ -69,20 +69,20 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X86-AVX-NEXT: movl 40(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovaps %ymm1, (%esp)
-; X86-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
+; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: leal (%ecx,%ecx), %eax
; X86-AVX-NEXT: andl $31, %eax
-; X86-AVX-NEXT: movl 128(%esp,%eax,4), %eax
+; X86-AVX-NEXT: movl 152(%esp,%eax,4), %eax
; X86-AVX-NEXT: leal 1(%ecx,%ecx), %ecx
; X86-AVX-NEXT: andl $31, %ecx
-; X86-AVX-NEXT: movl (%esp,%ecx,4), %edx
+; X86-AVX-NEXT: movl 24(%esp,%ecx,4), %edx
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: vzeroupper
@@ -90,22 +90,18 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
;
; X64-AVX-LABEL: extract_any_extend_vector_inreg_v16i64:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: pushq %rbp
-; X64-AVX-NEXT: movq %rsp, %rbp
-; X64-AVX-NEXT: andq $-32, %rsp
-; X64-AVX-NEXT: subq $160, %rsp
+; X64-AVX-NEXT: pushq %rax
; X64-AVX-NEXT: # kill: def $edi killed $edi def $rdi
; X64-AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm3[3,3,3,3]
; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX-NEXT: vmovaps %ymm1, (%rsp)
-; X64-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: andl $15, %edi
-; X64-AVX-NEXT: movq (%rsp,%rdi,8), %rax
-; X64-AVX-NEXT: movq %rbp, %rsp
-; X64-AVX-NEXT: popq %rbp
+; X64-AVX-NEXT: movq -128(%rsp,%rdi,8), %rax
+; X64-AVX-NEXT: popq %rcx
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
%1 = extractelement <16 x i64> %a0, i32 15
More information about the llvm-commits
mailing list