[PATCH] D157373: [RISCV] add a compress optimization for stack inst.
Wang Pengcheng via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 00:20:35 PDT 2023
wangpc added a comment.
I just found a regression:
--- a/llvm/test/CodeGen/RISCV/stack-realignment.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+c -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
declare void @callee(ptr)
@@ -529,56 +529,58 @@ define void @caller_no_realign2048() "no-realign-stack" {
define void @caller4096() {
; RV32I-LABEL: caller4096:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -2032
-; RV32I-NEXT: .cfi_def_cfa_offset 2032
-; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -256
+; RV32I-NEXT: .cfi_def_cfa_offset 256
+; RV32I-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: addi s0, sp, 2032
+; RV32I-NEXT: addi s0, sp, 256
; RV32I-NEXT: .cfi_def_cfa s0, 0
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: addi a0, a0, -2032
+; RV32I-NEXT: li a0, 31
+; RV32I-NEXT: slli a0, a0, 8
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: srli a0, sp, 12
; RV32I-NEXT: slli sp, a0, 12
; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: add a0, a0, sp
; RV32I-NEXT: call callee at plt
; RV32I-NEXT: lui a0, 2
; RV32I-NEXT: sub sp, s0, a0
-; RV32I-NEXT: addi a0, a0, -2032
+; RV32I-NEXT: li a0, 31
+; RV32I-NEXT: slli a0, a0, 8
; RV32I-NEXT: add sp, sp, a0
-; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 2032
+; RV32I-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 256
; RV32I-NEXT: ret
;
; RV64I-LABEL: caller4096:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -2032
-; RV64I-NEXT: .cfi_def_cfa_offset 2032
-; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -512
+; RV64I-NEXT: .cfi_def_cfa_offset 512
+; RV64I-NEXT: sd ra, 504(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 496(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: addi s0, sp, 2032
+; RV64I-NEXT: addi s0, sp, 512
; RV64I-NEXT: .cfi_def_cfa s0, 0
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: addiw a0, a0, -2032
+; RV64I-NEXT: li a0, 15
+; RV64I-NEXT: slli a0, a0, 9
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: srli a0, sp, 12
; RV64I-NEXT: slli sp, a0, 12
; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: add a0, sp, a0
+; RV64I-NEXT: add a0, a0, sp
; RV64I-NEXT: call callee at plt
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: sub sp, s0, a0
-; RV64I-NEXT: addiw a0, a0, -2032
+; RV64I-NEXT: li a0, 15
+; RV64I-NEXT: slli a0, a0, 9
; RV64I-NEXT: add sp, sp, a0
-; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 2032
+; RV64I-NEXT: ld ra, 504(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 496(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 512
; RV64I-NEXT: ret
%1 = alloca i8, align 4096
call void @callee(ptr %1)
I think that is because we need to do a larger second stack adjustment if we do a small first stack adjustment.
So the impact on performance should be evalated so that we can decide whether this optimization should be enabled under `-Os/-Oz` only. :-)
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D157373/new/
https://reviews.llvm.org/D157373
More information about the llvm-commits
mailing list