[llvm] daf067d - [LoongArch] Stack realignment support
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 21 02:31:01 PDT 2022
Author: wanglei
Date: 2022-10-21T17:30:29+08:00
New Revision: daf067da04c98ce666f67ba2b3beed4cf4be7549
URL: https://github.com/llvm/llvm-project/commit/daf067da04c98ce666f67ba2b3beed4cf4be7549
DIFF: https://github.com/llvm/llvm-project/commit/daf067da04c98ce666f67ba2b3beed4cf4be7549.diff
LOG: [LoongArch] Stack realignment support
This patch adds support for stack realignment while adding support for
variable sized objects.
Differential Revision: https://reviews.llvm.org/D136074
Added:
llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
llvm/test/CodeGen/LoongArch/stack-realignment.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 5de473bfc4dd..45472157b482 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -184,6 +184,36 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
+
+ // Realign stack.
+ if (RI->hasStackRealignment(MF)) {
+ unsigned ShiftAmount = Log2(MFI.getMaxAlign());
+ Register VR =
+ MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(MBB, MBBI, DL,
+ TII->get(STI.is64Bit() ? LoongArch::SRLI_D : LoongArch::SRLI_W),
+ VR)
+ .addReg(SPReg)
+ .addImm(ShiftAmount)
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL,
+ TII->get(STI.is64Bit() ? LoongArch::SLLI_D : LoongArch::SLLI_W),
+ SPReg)
+ .addReg(VR)
+ .addImm(ShiftAmount)
+ .setMIFlag(MachineInstr::FrameSetup);
+ // FP will be used to restore the frame in the epilogue, so we need
+ // another base register BP to record SP after re-alignment. SP will
+ // track the current stack after allocating variable sized objects.
+ if (hasBP(MF)) {
+ // move BP, $sp
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::OR),
+ LoongArchABI::getBPReg())
+ .addReg(SPReg)
+ .addReg(LoongArch::R0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
}
}
@@ -276,6 +306,7 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference(
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
+ uint64_t StackSize = MFI.getStackSize();
// Callee-saved registers should be referenced relative to the stack
// pointer (positive offset), otherwise use the frame pointer (negative
@@ -292,12 +323,21 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference(
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
}
- if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) {
+ if (FI >= MinCSFI && FI <= MaxCSFI) {
FrameReg = LoongArch::R3;
- Offset += StackOffset::getFixed(MFI.getStackSize());
+ Offset += StackOffset::getFixed(StackSize);
+ } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
+ // If the stack was realigned, the frame pointer is set in order to allow
+ // SP to be restored, so we need another base register to record the stack
+ // after realignment.
+ FrameReg = hasBP(MF) ? LoongArchABI::getBPReg() : LoongArch::R3;
+ Offset += StackOffset::getFixed(StackSize);
} else {
FrameReg = RI->getFrameRegister(MF);
- Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize());
+ if (hasFP(MF))
+ Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize());
+ else
+ Offset += StackOffset::getFixed(StackSize);
}
return Offset;
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
new file mode 100644
index 000000000000..7e2f8246bca4
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA64
+
+declare void @callee(i8*, i32*)
+
+define void @caller(i32 %n) {
+; LA32-LABEL: caller:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: .cfi_def_cfa_offset 64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: .cfi_offset 31, -12
+; LA32-NEXT: addi.w $fp, $sp, 64
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a1, $sp, 6
+; LA32-NEXT: slli.w $sp, $a1, 6
+; LA32-NEXT: move $s8, $sp
+; LA32-NEXT: addi.w $a0, $a0, 15
+; LA32-NEXT: addi.w $a1, $zero, -16
+; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: sub.w $a0, $sp, $a0
+; LA32-NEXT: move $sp, $a0
+; LA32-NEXT: addi.w $a1, $s8, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -64
+; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -64
+; LA64-NEXT: .cfi_def_cfa_offset 64
+; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s8, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: .cfi_offset 31, -24
+; LA64-NEXT: addi.d $fp, $sp, 64
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a1, $sp, 6
+; LA64-NEXT: slli.d $sp, $a1, 6
+; LA64-NEXT: move $s8, $sp
+; LA64-NEXT: addi.w $a1, $zero, -16
+; LA64-NEXT: lu32i.d $a1, 1
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT: addi.d $a0, $a0, 15
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: sub.d $a0, $sp, $a0
+; LA64-NEXT: move $sp, $a0
+; LA64-NEXT: addi.d $a1, $s8, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -64
+; LA64-NEXT: ld.d $s8, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 64
+; LA64-NEXT: ret
+ %1 = alloca i8, i32 %n
+ %2 = alloca i32, align 64
+ call void @callee(i8* %1, i32 *%2)
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment.ll b/llvm/test/CodeGen/LoongArch/stack-realignment.ll
new file mode 100644
index 000000000000..16c7bcd8b1c5
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment.ll
@@ -0,0 +1,627 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA64
+
+declare void @callee(i8*)
+
+define void @caller32() {
+; LA32-LABEL: caller32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: .cfi_def_cfa_offset 32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 32
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 5
+; LA32-NEXT: slli.w $sp, $a0, 5
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -32
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: .cfi_def_cfa_offset 32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 32
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 5
+; LA64-NEXT: slli.d $sp, $a0, 5
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -32
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %1 = alloca i8, align 32
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign32() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 32
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller64() {
+; LA32-LABEL: caller64:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: .cfi_def_cfa_offset 64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 64
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 6
+; LA32-NEXT: slli.w $sp, $a0, 6
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -64
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller64:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -64
+; LA64-NEXT: .cfi_def_cfa_offset 64
+; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 64
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 6
+; LA64-NEXT: slli.d $sp, $a0, 6
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -64
+; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 64
+; LA64-NEXT: ret
+ %1 = alloca i8, align 64
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign64() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign64:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign64:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 64
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller128() {
+; LA32-LABEL: caller128:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -128
+; LA32-NEXT: .cfi_def_cfa_offset 128
+; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 128
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 7
+; LA32-NEXT: slli.w $sp, $a0, 7
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -128
+; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 128
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller128:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -128
+; LA64-NEXT: .cfi_def_cfa_offset 128
+; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 128
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 7
+; LA64-NEXT: slli.d $sp, $a0, 7
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -128
+; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 128
+; LA64-NEXT: ret
+ %1 = alloca i8, align 128
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign128() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign128:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign128:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 128
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller256() {
+; LA32-LABEL: caller256:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -256
+; LA32-NEXT: .cfi_def_cfa_offset 256
+; LA32-NEXT: st.w $ra, $sp, 252 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 248 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 256
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 8
+; LA32-NEXT: slli.w $sp, $a0, 8
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -256
+; LA32-NEXT: ld.w $fp, $sp, 248 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 252 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 256
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller256:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -256
+; LA64-NEXT: .cfi_def_cfa_offset 256
+; LA64-NEXT: st.d $ra, $sp, 248 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 240 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 256
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 8
+; LA64-NEXT: slli.d $sp, $a0, 8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -256
+; LA64-NEXT: ld.d $fp, $sp, 240 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 248 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 256
+; LA64-NEXT: ret
+ %1 = alloca i8, align 256
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign256() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign256:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign256:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 256
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller512() {
+; LA32-LABEL: caller512:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -512
+; LA32-NEXT: .cfi_def_cfa_offset 512
+; LA32-NEXT: st.w $ra, $sp, 508 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 504 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 512
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 9
+; LA32-NEXT: slli.w $sp, $a0, 9
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -512
+; LA32-NEXT: ld.w $fp, $sp, 504 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 508 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 512
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller512:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -512
+; LA64-NEXT: .cfi_def_cfa_offset 512
+; LA64-NEXT: st.d $ra, $sp, 504 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 496 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 512
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 9
+; LA64-NEXT: slli.d $sp, $a0, 9
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -512
+; LA64-NEXT: ld.d $fp, $sp, 496 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 504 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 512
+; LA64-NEXT: ret
+ %1 = alloca i8, align 512
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign512() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign512:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign512:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 512
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller1024() {
+; LA32-LABEL: caller1024:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -1024
+; LA32-NEXT: .cfi_def_cfa_offset 1024
+; LA32-NEXT: st.w $ra, $sp, 1020 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 1016 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 1024
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 10
+; LA32-NEXT: slli.w $sp, $a0, 10
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -1024
+; LA32-NEXT: ld.w $fp, $sp, 1016 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 1020 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 1024
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller1024:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -1024
+; LA64-NEXT: .cfi_def_cfa_offset 1024
+; LA64-NEXT: st.d $ra, $sp, 1016 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 1008 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 1024
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 10
+; LA64-NEXT: slli.d $sp, $a0, 10
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -1024
+; LA64-NEXT: ld.d $fp, $sp, 1008 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 1016 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 1024
+; LA64-NEXT: ret
+ %1 = alloca i8, align 1024
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign1024() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign1024:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign1024:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 1024
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller2048() {
+; LA32-LABEL: caller2048:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -2048
+; LA32-NEXT: .cfi_def_cfa_offset 2048
+; LA32-NEXT: st.w $ra, $sp, 2044 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 2040 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: addi.w $fp, $sp, 2032
+; LA32-NEXT: addi.w $fp, $fp, 16
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 11
+; LA32-NEXT: slli.w $sp, $a0, 11
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: addi.w $sp, $fp, -2048
+; LA32-NEXT: ld.w $fp, $sp, 2040 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 2044 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 2032
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller2048:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -2048
+; LA64-NEXT: .cfi_def_cfa_offset 2048
+; LA64-NEXT: st.d $ra, $sp, 2040 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 2032 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: addi.d $fp, $sp, 2032
+; LA64-NEXT: addi.d $fp, $fp, 16
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 11
+; LA64-NEXT: slli.d $sp, $a0, 11
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: addi.d $sp, $fp, -2048
+; LA64-NEXT: ld.d $fp, $sp, 2032 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 2040 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 2032
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 2048
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign2048() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign2048:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign2048:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 2048
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller4096() {
+; LA32-LABEL: caller4096:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a0, 1
+; LA32-NEXT: sub.w $sp, $sp, $a0
+; LA32-NEXT: .cfi_def_cfa_offset 4096
+; LA32-NEXT: ori $a0, $zero, 4092
+; LA32-NEXT: add.w $a0, $sp, $a0
+; LA32-NEXT: st.w $ra, $a0, 0 # 4-byte Folded Spill
+; LA32-NEXT: ori $a0, $zero, 4088
+; LA32-NEXT: add.w $a0, $sp, $a0
+; LA32-NEXT: st.w $fp, $a0, 0 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: lu12i.w $a0, 1
+; LA32-NEXT: add.w $fp, $sp, $a0
+; LA32-NEXT: .cfi_def_cfa 22, 0
+; LA32-NEXT: srli.w $a0, $sp, 12
+; LA32-NEXT: slli.w $sp, $a0, 12
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: lu12i.w $a0, 1
+; LA32-NEXT: sub.w $sp, $fp, $a0
+; LA32-NEXT: ori $a0, $zero, 4088
+; LA32-NEXT: add.w $a0, $sp, $a0
+; LA32-NEXT: ld.w $fp, $a0, 0 # 4-byte Folded Reload
+; LA32-NEXT: ori $a0, $zero, 4092
+; LA32-NEXT: add.w $a0, $sp, $a0
+; LA32-NEXT: ld.w $ra, $a0, 0 # 4-byte Folded Reload
+; LA32-NEXT: lu12i.w $a0, 1
+; LA32-NEXT: add.w $sp, $sp, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller4096:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a0, 1
+; LA64-NEXT: sub.d $sp, $sp, $a0
+; LA64-NEXT: .cfi_def_cfa_offset 4096
+; LA64-NEXT: ori $a0, $zero, 4088
+; LA64-NEXT: add.d $a0, $sp, $a0
+; LA64-NEXT: st.d $ra, $a0, 0 # 8-byte Folded Spill
+; LA64-NEXT: ori $a0, $zero, 4080
+; LA64-NEXT: add.d $a0, $sp, $a0
+; LA64-NEXT: st.d $fp, $a0, 0 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: lu12i.w $a0, 1
+; LA64-NEXT: add.d $fp, $sp, $a0
+; LA64-NEXT: .cfi_def_cfa 22, 0
+; LA64-NEXT: srli.d $a0, $sp, 12
+; LA64-NEXT: slli.d $sp, $a0, 12
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: lu12i.w $a0, 1
+; LA64-NEXT: sub.d $sp, $fp, $a0
+; LA64-NEXT: ori $a0, $zero, 4080
+; LA64-NEXT: add.d $a0, $sp, $a0
+; LA64-NEXT: ld.d $fp, $a0, 0 # 8-byte Folded Reload
+; LA64-NEXT: ori $a0, $zero, 4088
+; LA64-NEXT: add.d $a0, $sp, $a0
+; LA64-NEXT: ld.d $ra, $a0, 0 # 8-byte Folded Reload
+; LA64-NEXT: lu12i.w $a0, 1
+; LA64-NEXT: add.d $sp, $sp, $a0
+; LA64-NEXT: ret
+ %1 = alloca i8, align 4096
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign4096() "no-realign-stack" {
+; LA32-LABEL: caller_no_realign4096:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: addi.w $a0, $sp, 0
+; LA32-NEXT: bl %plt(callee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: caller_no_realign4096:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: addi.d $a0, $sp, 0
+; LA64-NEXT: bl %plt(callee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %1 = alloca i8, align 4096
+ call void @callee(i8* %1)
+ ret void
+}
More information about the llvm-commits
mailing list