[llvm] [RISCV] Take known minimum vlen into account when calculating alignment padding in assignRVVStackObjectOffsets. (PR #110312)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 11:43:35 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
If we know vlen is a multiple of 16, we don't need any alignment padding.
I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
---
Patch is 127.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110312.diff
34 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+10-1)
- (modified) llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll (+5-5)
- (modified) llvm/test/CodeGen/RISCV/rvv-cfi-info.ll (+29-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll (+1-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir (-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll (+5-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll (+176-79)
- (modified) llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll (-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll (+2-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+2-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+5-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll (+10-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll (+4-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll (+10-10)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+13-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+13-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll (+42-54)
- (modified) llvm/test/CodeGen/RISCV/rvv/frm-insert.ll (-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll (-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll (-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll (+69-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/stack-folding.ll (+8-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll (+10-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll (+10-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll (+26-44)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll (+5-9)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll (+10-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+5-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir (+2-27)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 22824b77c37dd6..b0c525ea8c2996 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1102,16 +1102,25 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
RVVStackAlign = std::max(RVVStackAlign, ObjectAlign);
}
+ uint64_t StackSize = Offset;
+
+ // Multiply by vscale.
+ if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
+ StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
+
// Ensure the alignment of the RVV stack. Since we want the most-aligned
// object right at the bottom (i.e., any padding at the top of the frame),
// readjust all RVV objects down by the alignment padding.
- uint64_t StackSize = Offset;
if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) {
StackSize += AlignmentPadding;
for (int FI : ObjectsToAllocate)
MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
}
+ // Remove vscale.
+ if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
+ StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
+
return std::make_pair(StackSize, RVVStackAlign);
}
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 899aad6ed7232c..0c2b809c0be20c 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -17,10 +17,10 @@ define void @_Z3foov() {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a1, a0, 3
+; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
@@ -83,8 +83,8 @@ define void @_Z3foov() {
; CHECK-NEXT: addi a0, a0, %lo(var_47)
; CHECK-NEXT: vsseg4e16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a1, a0, 3
+; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
index 93fe66695b70ec..225680e846bac7 100644
--- a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
+++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
@@ -10,9 +10,10 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
; OMIT-FP-NEXT: addi sp, sp, -16
; OMIT-FP-NEXT: .cfi_def_cfa_offset 16
; OMIT-FP-NEXT: csrr a0, vlenb
-; OMIT-FP-NEXT: slli a0, a0, 3
+; OMIT-FP-NEXT: slli a1, a0, 3
+; OMIT-FP-NEXT: sub a0, a1, a0
; OMIT-FP-NEXT: sub sp, sp, a0
-; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 7 * vlenb
; OMIT-FP-NEXT: csrr a0, vlenb
; OMIT-FP-NEXT: li a1, 6
; OMIT-FP-NEXT: mul a0, a0, a1
@@ -26,13 +27,13 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
; OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
; OMIT-FP-NEXT: addi a0, sp, 16
; OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 2 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 4 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 3 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 8 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 7 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 6 * vlenb
-; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 5 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 1 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 3 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 2 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 7 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 6 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 5 * vlenb
+; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 4 * vlenb
; OMIT-FP-NEXT: #APP
; OMIT-FP-NEXT: #NO_APP
; OMIT-FP-NEXT: csrr a0, vlenb
@@ -49,7 +50,8 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
; OMIT-FP-NEXT: addi a0, sp, 16
; OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
; OMIT-FP-NEXT: csrr a0, vlenb
-; OMIT-FP-NEXT: slli a0, a0, 3
+; OMIT-FP-NEXT: slli a1, a0, 3
+; OMIT-FP-NEXT: sub a0, a1, a0
; OMIT-FP-NEXT: add sp, sp, a0
; OMIT-FP-NEXT: addi sp, sp, 16
; OMIT-FP-NEXT: ret
@@ -65,44 +67,47 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
; NO-OMIT-FP-NEXT: addi s0, sp, 32
; NO-OMIT-FP-NEXT: .cfi_def_cfa s0, 0
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 3
+; NO-OMIT-FP-NEXT: slli a1, a0, 3
+; NO-OMIT-FP-NEXT: sub a0, a1, a0
; NO-OMIT-FP-NEXT: sub sp, sp, a0
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 1
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 2
+; NO-OMIT-FP-NEXT: slli a1, a0, 1
+; NO-OMIT-FP-NEXT: add a0, a1, a0
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 3
+; NO-OMIT-FP-NEXT: slli a1, a0, 3
+; NO-OMIT-FP-NEXT: sub a0, a1, a0
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 2 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 32 - 4 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 32 - 3 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 32 - 8 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 32 - 7 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 32 - 6 * vlenb
-; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 32 - 5 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 1 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 32 - 3 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 32 - 2 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 32 - 7 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 32 - 6 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 32 - 5 * vlenb
+; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 32 - 4 * vlenb
; NO-OMIT-FP-NEXT: #APP
; NO-OMIT-FP-NEXT: #NO_APP
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 1
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 2
+; NO-OMIT-FP-NEXT: slli a1, a0, 1
+; NO-OMIT-FP-NEXT: add a0, a1, a0
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload
; NO-OMIT-FP-NEXT: csrr a0, vlenb
-; NO-OMIT-FP-NEXT: slli a0, a0, 3
+; NO-OMIT-FP-NEXT: slli a1, a0, 3
+; NO-OMIT-FP-NEXT: sub a0, a1, a0
; NO-OMIT-FP-NEXT: sub a0, s0, a0
; NO-OMIT-FP-NEXT: addi a0, a0, -32
; NO-OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
index 91f700ef968001..8640ac2da53030 100644
--- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
@@ -33,9 +33,8 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
; RV64IV-NEXT: addi sp, sp, -528
; RV64IV-NEXT: .cfi_def_cfa_offset 528
; RV64IV-NEXT: csrr a0, vlenb
-; RV64IV-NEXT: slli a0, a0, 1
; RV64IV-NEXT: sub sp, sp, a0
-; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb
+; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb
; RV64IV-NEXT: addi a0, sp, 8
; RV64IV-NEXT: vl1re64.v v8, (a0)
; RV64IV-NEXT: addi a0, sp, 528
@@ -44,7 +43,6 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64IV-NEXT: vadd.vv v8, v8, v9
; RV64IV-NEXT: csrr a0, vlenb
-; RV64IV-NEXT: slli a0, a0, 1
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: addi sp, sp, 528
; RV64IV-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index f976adcfe931c2..5f0e1a9b9aa24c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -38,12 +38,10 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240
; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB
- ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1
; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, killed renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
; CHECK-NEXT: $x10 = PseudoReadVLENB
- ; CHECK-NEXT: $x10 = SLLI killed $x10, 1
; CHECK-NEXT: $x10 = SUB $x8, killed $x10
; CHECK-NEXT: $x10 = ADDI killed $x10, -2048
; CHECK-NEXT: $x10 = ADDI killed $x10, -224
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
index 1fe91c721f4dd2..2e70c3395090ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
@@ -11,9 +11,10 @@ define void @test(ptr %addr) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrrs a1, vlenb, zero
-; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb
; CHECK-NEXT: csrrs a1, vlenb, zero
; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vl1re64.v v8, (a2)
@@ -28,7 +29,8 @@ define void @test(ptr %addr) {
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: csrrs a0, vlenb, zero
-; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: jalr zero, 0(ra)
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 90794820ddd849..35e269b9119025 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -7,34 +7,13 @@
; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
define void @lmul1() nounwind {
-; NOZBA-LABEL: lmul1:
-; NOZBA: # %bb.0:
-; NOZBA-NEXT: csrr a0, vlenb
-; NOZBA-NEXT: slli a0, a0, 1
-; NOZBA-NEXT: sub sp, sp, a0
-; NOZBA-NEXT: csrr a0, vlenb
-; NOZBA-NEXT: slli a0, a0, 1
-; NOZBA-NEXT: add sp, sp, a0
-; NOZBA-NEXT: ret
-;
-; ZBA-LABEL: lmul1:
-; ZBA: # %bb.0:
-; ZBA-NEXT: csrr a0, vlenb
-; ZBA-NEXT: slli a0, a0, 1
-; ZBA-NEXT: sub sp, sp, a0
-; ZBA-NEXT: csrr a0, vlenb
-; ZBA-NEXT: sh1add sp, a0, sp
-; ZBA-NEXT: ret
-;
-; NOMUL-LABEL: lmul1:
-; NOMUL: # %bb.0:
-; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: slli a0, a0, 1
-; NOMUL-NEXT: sub sp, sp, a0
-; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: slli a0, a0, 1
-; NOMUL-NEXT: add sp, sp, a0
-; NOMUL-NEXT: ret
+; CHECK-LABEL: lmul1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ret
%v = alloca <vscale x 1 x i64>
ret void
}
@@ -192,29 +171,34 @@ define void @lmul2_and_1() nounwind {
; NOZBA-LABEL: lmul2_and_1:
; NOZBA: # %bb.0:
; NOZBA-NEXT: csrr a0, vlenb
-; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
; NOZBA-NEXT: sub sp, sp, a0
; NOZBA-NEXT: csrr a0, vlenb
-; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
; NOZBA-NEXT: add sp, sp, a0
; NOZBA-NEXT: ret
;
; ZBA-LABEL: lmul2_and_1:
; ZBA: # %bb.0:
; ZBA-NEXT: csrr a0, vlenb
-; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sh1add a0, a0, a0
; ZBA-NEXT: sub sp, sp, a0
; ZBA-NEXT: csrr a0, vlenb
-; ZBA-NEXT: sh2add sp, a0, sp
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: add sp, sp, a0
; ZBA-NEXT: ret
;
; NOMUL-LABEL: lmul2_and_1:
; NOMUL: # %bb.0:
; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: slli a1, a0, 1
+; NOMUL-NEXT: add a0, a1, a0
; NOMUL-NEXT: sub sp, sp, a0
; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: slli a1, a0, 1
+; NOMUL-NEXT: add a0, a1, a0
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: ret
%v1 = alloca <vscale x 2 x i64>
@@ -223,63 +207,176 @@ define void @lmul2_and_1() nounwind {
}
define void @lmul4_and_1() nounwind {
-; CHECK-LABEL: lmul4_and_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 48
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: addi sp, s0, -48
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul4_and_1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -48
+; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 48
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: li a1, 6
+; NOZBA-NEXT: mul a0, a0, a1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: addi sp, s0, -48
+; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 48
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul4_and_1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -48
+; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 48
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 1
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -32
+; ZBA-NEXT: addi sp, s0, -48
+; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 48
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul4_and_1:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: addi sp, sp, -48
+; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: addi s0, sp, 48
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: mv a1, a0
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: add a0, a0, a1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: andi sp, sp, -32
+; NOMUL-NEXT: addi sp, s0, -48
+; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: addi sp, sp, 48
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
}
define void @lmul4_and_2() nounwind {
-; CHECK-LABEL: lmul4_and_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 48
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: addi sp, s0, -48
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul4_and_2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -48
+; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 48
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: li a1, 6
+; NOZBA-NEXT: mul a0, a0, a1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: addi sp, s0, -48
+; NOZBA-NEXT: ld ra, 40(sp)...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/110312
More information about the llvm-commits
mailing list