[llvm] [M68k] always use movem for register spills (PR #106715)
Janis Heims via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 07:58:10 PST 2025
https://github.com/TechnoElf updated https://github.com/llvm/llvm-project/pull/106715
>From 9841b8588fe3bbedc6ed812cf99555779b7b96b1 Mon Sep 17 00:00:00 2001
From: TechnoElf <technoelf at undertheprinter.com>
Date: Fri, 30 Aug 2024 13:41:21 +0200
Subject: [PATCH] [M68k] always use movem for register spills / spill 8 bit
registers into 16 bit slots
---
llvm/lib/Target/M68k/M68kExpandPseudo.cpp | 16 +-
llvm/lib/Target/M68k/M68kInstrData.td | 4 -
llvm/lib/Target/M68k/M68kInstrInfo.cpp | 35 +-
llvm/lib/Target/M68k/M68kRegisterInfo.td | 26 ++
llvm/test/CodeGen/M68k/PR57660.ll | 22 +-
llvm/test/CodeGen/M68k/register-spills.ll | 464 ++++++++++++++++++++++
6 files changed, 520 insertions(+), 47 deletions(-)
create mode 100644 llvm/test/CodeGen/M68k/register-spills.ll
diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index c7fdd7d7c35023..1ba265a60c3d60 100644
--- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -193,31 +193,23 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case M68k::MOV8dc:
return TII->ExpandCCR(MIB, /*IsToCCR=*/false);
- case M68k::MOVM8jm_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false);
case M68k::MOVM16jm_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false);
+ return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16jm), /*IsRM=*/false);
case M68k::MOVM32jm_P:
return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false);
- case M68k::MOVM8pm_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false);
case M68k::MOVM16pm_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false);
+ return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16pm), /*IsRM=*/false);
case M68k::MOVM32pm_P:
return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false);
- case M68k::MOVM8mj_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true);
case M68k::MOVM16mj_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true);
+ return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16mj), /*IsRM=*/true);
case M68k::MOVM32mj_P:
return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true);
- case M68k::MOVM8mp_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true);
case M68k::MOVM16mp_P:
- return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true);
+ return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16mp), /*IsRM=*/true);
case M68k::MOVM32mp_P:
return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true);
diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td
index dc777a933e2786..48aa8aeb667db6 100644
--- a/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/llvm/lib/Target/M68k/M68kInstrData.td
@@ -337,20 +337,16 @@ class MxMOVEM_RM_Pseudo<MxType TYPE, MxOperand MEMOp>
: MxPseudo<(outs TYPE.ROp:$dst), (ins MEMOp:$src)>;
// Mem <- Reg
-def MOVM8jm_P : MxMOVEM_MR_Pseudo<MxType8d, MxType8.JOp>;
def MOVM16jm_P : MxMOVEM_MR_Pseudo<MxType16r, MxType16.JOp>;
def MOVM32jm_P : MxMOVEM_MR_Pseudo<MxType32r, MxType32.JOp>;
-def MOVM8pm_P : MxMOVEM_MR_Pseudo<MxType8d, MxType8.POp>;
def MOVM16pm_P : MxMOVEM_MR_Pseudo<MxType16r, MxType16.POp>;
def MOVM32pm_P : MxMOVEM_MR_Pseudo<MxType32r, MxType32.POp>;
// Reg <- Mem
-def MOVM8mj_P : MxMOVEM_RM_Pseudo<MxType8d, MxType8.JOp>;
def MOVM16mj_P : MxMOVEM_RM_Pseudo<MxType16r, MxType16.JOp>;
def MOVM32mj_P : MxMOVEM_RM_Pseudo<MxType32r, MxType32.JOp>;
-def MOVM8mp_P : MxMOVEM_RM_Pseudo<MxType8d, MxType8.POp>;
def MOVM16mp_P : MxMOVEM_RM_Pseudo<MxType16r, MxType16.POp>;
def MOVM32mp_P : MxMOVEM_RM_Pseudo<MxType32r, MxType32.POp>;
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 23c5c76a47479b..f7d4e7ffd1b43e 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -542,7 +542,6 @@ bool M68kInstrInfo::ExpandCCR(MachineInstrBuilder &MIB, bool IsToCCR) const {
bool M68kInstrInfo::ExpandMOVEM(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc, bool IsRM) const {
int Reg = 0, Offset = 0, Base = 0;
- auto XR32 = RI.getRegClass(M68k::XR32RegClassID);
auto DL = MIB->getDebugLoc();
auto MI = MIB.getInstr();
auto &MBB = *MIB->getParent();
@@ -557,13 +556,6 @@ bool M68kInstrInfo::ExpandMOVEM(MachineInstrBuilder &MIB,
Reg = MIB->getOperand(2).getReg();
}
- // If the register is not in XR32 then it is smaller than 32 bit, we
- // implicitly promote it to 32
- if (!XR32->contains(Reg)) {
- Reg = RI.getMatchingMegaReg(Reg, XR32);
- assert(Reg && "Has not meaningful MEGA register");
- }
-
unsigned Mask = 1 << RI.getSpillRegisterOrder(Reg);
if (IsRM) {
BuildMI(MBB, MI, DL, Desc)
@@ -734,22 +726,25 @@ namespace {
unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
const M68kSubtarget &STI, bool load) {
- switch (TRI->getRegSizeInBits(*RC)) {
+ switch (TRI->getSpillSize(*RC)) {
default:
+ LLVM_DEBUG(
+ dbgs() << "Cannot determine appropriate opcode for load/store to/from "
+ << TRI->getName(Reg) << " of class " << TRI->getRegClassName(RC)
+ << " with spill size " << TRI->getSpillSize(*RC) << '\n');
llvm_unreachable("Unknown spill size");
- case 8:
+ case 2:
+ if (M68k::XR16RegClass.hasSubClassEq(RC))
+ return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P;
if (M68k::DR8RegClass.hasSubClassEq(RC))
- return load ? M68k::MOV8dp : M68k::MOV8pd;
+ return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P;
if (M68k::CCRCRegClass.hasSubClassEq(RC))
- return load ? M68k::MOV16cp : M68k::MOV16pc;
-
- llvm_unreachable("Unknown 1-byte regclass");
- case 16:
- assert(M68k::XR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
- return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P;
- case 32:
- assert(M68k::XR32RegClass.hasSubClassEq(RC) && "Unknown 4-byte regclass");
- return load ? M68k::MOVM32mp_P : M68k::MOVM32pm_P;
+ return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P;
+ llvm_unreachable("Unknown 2-byte regclass");
+ case 4:
+ if (M68k::XR32RegClass.hasSubClassEq(RC))
+ return load ? M68k::MOVM32mp_P : M68k::MOVM32pm_P;
+ llvm_unreachable("Unknown 4-byte regclass");
}
}
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.td b/llvm/lib/Target/M68k/M68kRegisterInfo.td
index 45b492eba4ec07..f0f70b4aab3c0a 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.td
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.td
@@ -99,52 +99,78 @@ class MxRegClass<list<ValueType> regTypes, int alignment, dag regList>
: RegisterClass<"M68k", regTypes, alignment, regList>;
// Data Registers
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<8,16,16>]> in
def DR8 : MxRegClass<[i8], 16, (sequence "BD%u", 0, 7)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def DR16 : MxRegClass<[i16], 16, (sequence "WD%u", 0, 7)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def DR32 : MxRegClass<[i32], 32, (sequence "D%u", 0, 7)>;
// Address Registers
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def AR16 : MxRegClass<[i16], 16, (add (sequence "WA%u", 0, 6), WSP)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def AR32 : MxRegClass<[i32], 32, (add (sequence "A%u", 0, 6), SP)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def AR32_NOSP : MxRegClass<[i32], 32, (sequence "A%u", 0, 6)>;
// Index Register Classes
// FIXME try alternative ordering like `D0, D1, A0, A1, ...`
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def XR16 : MxRegClass<[i16], 16, (add DR16, AR16)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def XR32 : MxRegClass<[i32], 32, (add DR32, AR32)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def SPC : MxRegClass<[i32], 32, (add SP)>;
// Floating Point Data Registers
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def FPDR32 : MxRegClass<[f32], 32, (sequence "FP%u", 0, 7)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<64,64,32>]> in
def FPDR64 : MxRegClass<[f64], 32, (add FPDR32)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<80,128,32>]> in
def FPDR80 : MxRegClass<[f80], 32, (add FPDR32)>;
let CopyCost = -1 in {
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<8,16,16>]> in
def CCRC : MxRegClass<[i8], 16, (add CCR)>;
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def SRC : MxRegClass<[i16], 16, (add SR)>;
// Float Point System Control Registers
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def FPIC : MxRegClass<[i32], 32, (add FPIAR)>;
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def FPCSC : MxRegClass<[i32], 32, (add FPC, FPS)>;
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def FPSYSC : MxRegClass<[i32], 32, (add FPCSC, FPIC)>;
}
let isAllocatable = 0 in {
+ let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def PCC : MxRegClass<[i32], 32, (add PC)>;
}
// Register used with tail call
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def DR16_TC : MxRegClass<[i16], 16, (add D0, D1)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def DR32_TC : MxRegClass<[i32], 32, (add D0, D1)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def AR16_TC : MxRegClass<[i16], 16, (add A0, A1)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def AR32_TC : MxRegClass<[i32], 32, (add A0, A1)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in
def XR16_TC : MxRegClass<[i16], 16, (add DR16_TC, AR16_TC)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def XR32_TC : MxRegClass<[i32], 32, (add DR32_TC, AR32_TC)>;
// These classes provide spill/restore order if used with MOVEM instruction
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def SPILL : MxRegClass<[i32], 32, (add XR32)>;
+let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in
def SPILL_R : MxRegClass<[i32], 32, (add SP, (sequence "A%u", 6, 0), (sequence "D%u", 7, 0))>;
diff --git a/llvm/test/CodeGen/M68k/PR57660.ll b/llvm/test/CodeGen/M68k/PR57660.ll
index bad949b08cafac..359f0c24963568 100644
--- a/llvm/test/CodeGen/M68k/PR57660.ll
+++ b/llvm/test/CodeGen/M68k/PR57660.ll
@@ -8,10 +8,10 @@ define dso_local void @foo1() {
; CHECK-NEXT: suba.l #2, %sp
; CHECK-NEXT: .cfi_def_cfa_offset -6
; CHECK-NEXT: moveq #0, %d0
-; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
+; CHECK-NEXT: movem.w %d0, (0,%sp)
; CHECK-NEXT: .LBB0_1: ; %do.body
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload
+; CHECK-NEXT: movem.w (0,%sp), %d0
; CHECK-NEXT: cmpi.b #0, %d0
; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %do.end
@@ -39,24 +39,24 @@ define i32 @foo2(ptr noundef %0) {
; CHECK-NEXT: .cfi_def_cfa_offset -8
; CHECK-NEXT: move.l (8,%sp), %a0
; CHECK-NEXT: move.b (%a0), %d0
-; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
+; CHECK-NEXT: movem.w %d0, (0,%sp)
; CHECK-NEXT: and.b #1, %d0
-; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill
+; CHECK-NEXT: movem.w %d0, (2,%sp)
; CHECK-NEXT: sub.b #1, %d0
; CHECK-NEXT: bgt .LBB1_2
; CHECK-NEXT: ; %bb.1: ; %if
-; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload
-; CHECK-NEXT: move.b (0,%sp), %d1 ; 1-byte Folded Reload
+; CHECK-NEXT: movem.w (2,%sp), %d0
+; CHECK-NEXT: movem.w (0,%sp), %d1
; CHECK-NEXT: add.b %d1, %d0
; CHECK-NEXT: bra .LBB1_3
; CHECK-NEXT: .LBB1_2: ; %else
-; CHECK-NEXT: move.b (2,%sp), %d1 ; 1-byte Folded Reload
-; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload
+; CHECK-NEXT: movem.w (2,%sp), %d1
+; CHECK-NEXT: movem.w (0,%sp), %d0
; CHECK-NEXT: sub.b %d1, %d0
-; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
+; CHECK-NEXT: movem.w %d0, (0,%sp)
; CHECK-NEXT: .LBB1_3: ; %cont
-; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill
-; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload
+; CHECK-NEXT: movem.w %d0, (2,%sp)
+; CHECK-NEXT: movem.w (2,%sp), %d0
; CHECK-NEXT: ext.w %d0
; CHECK-NEXT: ext.l %d0
; CHECK-NEXT: adda.l #4, %sp
diff --git a/llvm/test/CodeGen/M68k/register-spills.ll b/llvm/test/CodeGen/M68k/register-spills.ll
new file mode 100644
index 00000000000000..9104a59f5d6b34
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/register-spills.ll
@@ -0,0 +1,464 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=m68k -O0 %s -o - | FileCheck %s
+
+declare zeroext i1 @get1()
+declare i8 @get8()
+declare i16 @get16()
+declare i32 @get32()
+
+define void @test_edge_detection_conditional_branch() {
+; CHECK-LABEL: test_edge_detection_conditional_branch:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #12, %sp
+; CHECK-NEXT: .cfi_def_cfa_offset -16
+; CHECK-NEXT: movem.l %d2, (8,%sp) ; 8-byte Folded Spill
+; CHECK-NEXT: bra .LBB0_1
+; CHECK-NEXT: .LBB0_1: ; %condition_check
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jsr get1
+; CHECK-NEXT: move.b (7,%sp), %d2
+; CHECK-NEXT: and.b #1, %d2
+; CHECK-NEXT: move.b %d0, %d1
+; CHECK-NEXT: sub.b %d2, %d1
+; CHECK-NEXT: movem.w %d0, (4,%sp)
+; CHECK-NEXT: bne .LBB0_1
+; CHECK-NEXT: bra .LBB0_2
+; CHECK-NEXT: .LBB0_2: ; %do_something
+; CHECK-NEXT: movem.w (4,%sp), %d0
+; CHECK-NEXT: move.b %d0, (7,%sp)
+; CHECK-NEXT: movem.l (8,%sp), %d2 ; 8-byte Folded Reload
+; CHECK-NEXT: adda.l #12, %sp
+; CHECK-NEXT: rts
+start:
+ %prev_state = alloca [1 x i8], align 1
+ br label %condition_check
+
+condition_check:
+ %state = call zeroext i1 @get1()
+ %local_prev_state = load i8, ptr %prev_state, align 1
+ %local_prev_state_trunc = trunc i8 %local_prev_state to i1
+ %result = icmp ne i1 %state, %local_prev_state_trunc
+ br i1 %result, label %condition_check, label %do_something
+
+do_something:
+ %state_ext = zext i1 %state to i8
+ store i8 %state_ext, ptr %prev_state, align 1
+ ret void
+}
+
+define void @test_force_spill_8() {
+; CHECK-LABEL: test_force_spill_8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #108, %sp
+; CHECK-NEXT: .cfi_def_cfa_offset -112
+; CHECK-NEXT: movem.l %d2-%d7, (84,%sp) ; 28-byte Folded Spill
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (82,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (66,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d2
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d3
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d4
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d5
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d6
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d7
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (80,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (78,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (76,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (74,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (72,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (70,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (68,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w (66,%sp), %d1
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %sp, %a0
+; CHECK-NEXT: move.l %d0, (60,%a0)
+; CHECK-NEXT: movem.w (68,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (56,%a0)
+; CHECK-NEXT: movem.w (70,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (52,%a0)
+; CHECK-NEXT: movem.w (72,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (48,%a0)
+; CHECK-NEXT: movem.w (74,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (44,%a0)
+; CHECK-NEXT: movem.w (76,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (40,%a0)
+; CHECK-NEXT: movem.w (78,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (36,%a0)
+; CHECK-NEXT: movem.w (80,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (32,%a0)
+; CHECK-NEXT: movem.w (82,%sp), %d0
+; CHECK-NEXT: and.l #255, %d7
+; CHECK-NEXT: move.l %d7, (28,%a0)
+; CHECK-NEXT: and.l #255, %d6
+; CHECK-NEXT: move.l %d6, (24,%a0)
+; CHECK-NEXT: and.l #255, %d5
+; CHECK-NEXT: move.l %d5, (20,%a0)
+; CHECK-NEXT: and.l #255, %d4
+; CHECK-NEXT: move.l %d4, (16,%a0)
+; CHECK-NEXT: and.l #255, %d3
+; CHECK-NEXT: move.l %d3, (12,%a0)
+; CHECK-NEXT: and.l #255, %d2
+; CHECK-NEXT: move.l %d2, (8,%a0)
+; CHECK-NEXT: and.l #255, %d1
+; CHECK-NEXT: move.l %d1, (4,%a0)
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (%a0)
+; CHECK-NEXT: jsr test_force_spill_8_consumer
+; CHECK-NEXT: movem.l (84,%sp), %d2-%d7 ; 28-byte Folded Reload
+; CHECK-NEXT: adda.l #108, %sp
+; CHECK-NEXT: rts
+ start:
+ %r0 = call i8 @get8()
+ %r1 = call i8 @get8()
+ %r2 = call i8 @get8()
+ %r3 = call i8 @get8()
+ %r4 = call i8 @get8()
+ %r5 = call i8 @get8()
+ %r6 = call i8 @get8()
+ %r7 = call i8 @get8()
+ %r8 = call i8 @get8()
+ %r9 = call i8 @get8()
+ %ra = call i8 @get8()
+ %rb = call i8 @get8()
+ %rc = call i8 @get8()
+ %rd = call i8 @get8()
+ %re = call i8 @get8()
+ %rf = call i8 @get8()
+ call void @test_force_spill_8_consumer(i8 %r0, i8 %r1, i8 %r2, i8 %r3, i8 %r4, i8 %r5, i8 %r6, i8 %r7, i8 %r8, i8 %r9, i8 %ra, i8 %rb, i8 %rc, i8 %rd, i8 %re, i8 %rf)
+ ret void
+}
+
+declare void @test_force_spill_8_consumer(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8)
+
+define void @test_force_spill_16() {
+; CHECK-LABEL: test_force_spill_16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #116, %sp
+; CHECK-NEXT: .cfi_def_cfa_offset -120
+; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (72,%sp) ; 48-byte Folded Spill
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (70,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (66,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (64,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d2
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d3
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d4
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d5
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d6
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d7
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a2
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a3
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a4
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a5
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a6
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (68,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w (64,%sp), %a1
+; CHECK-NEXT: movem.w (66,%sp), %d1
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %sp, %a0
+; CHECK-NEXT: move.l %d0, (60,%a0)
+; CHECK-NEXT: movem.w (68,%sp), %d0
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %d0, (56,%a0)
+; CHECK-NEXT: movem.w (70,%sp), %d0
+; CHECK-NEXT: and.l #65535, %a6
+; CHECK-NEXT: move.l %a6, (52,%a0)
+; CHECK-NEXT: and.l #65535, %a5
+; CHECK-NEXT: move.l %a5, (48,%a0)
+; CHECK-NEXT: and.l #65535, %a4
+; CHECK-NEXT: move.l %a4, (44,%a0)
+; CHECK-NEXT: and.l #65535, %a3
+; CHECK-NEXT: move.l %a3, (40,%a0)
+; CHECK-NEXT: and.l #65535, %a2
+; CHECK-NEXT: move.l %a2, (36,%a0)
+; CHECK-NEXT: and.l #65535, %d7
+; CHECK-NEXT: move.l %d7, (32,%a0)
+; CHECK-NEXT: and.l #65535, %d6
+; CHECK-NEXT: move.l %d6, (28,%a0)
+; CHECK-NEXT: and.l #65535, %d5
+; CHECK-NEXT: move.l %d5, (24,%a0)
+; CHECK-NEXT: and.l #65535, %d4
+; CHECK-NEXT: move.l %d4, (20,%a0)
+; CHECK-NEXT: and.l #65535, %d3
+; CHECK-NEXT: move.l %d3, (16,%a0)
+; CHECK-NEXT: and.l #65535, %d2
+; CHECK-NEXT: move.l %d2, (12,%a0)
+; CHECK-NEXT: and.l #65535, %a1
+; CHECK-NEXT: move.l %a1, (8,%a0)
+; CHECK-NEXT: and.l #65535, %d1
+; CHECK-NEXT: move.l %d1, (4,%a0)
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %d0, (%a0)
+; CHECK-NEXT: jsr test_force_spill_16_consumer
+; CHECK-NEXT: movem.l (72,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload
+; CHECK-NEXT: adda.l #116, %sp
+; CHECK-NEXT: rts
+ start:
+ %r0 = call i16 @get16()
+ %r1 = call i16 @get16()
+ %r2 = call i16 @get16()
+ %r3 = call i16 @get16()
+ %r4 = call i16 @get16()
+ %r5 = call i16 @get16()
+ %r6 = call i16 @get16()
+ %r7 = call i16 @get16()
+ %r8 = call i16 @get16()
+ %r9 = call i16 @get16()
+ %ra = call i16 @get16()
+ %rb = call i16 @get16()
+ %rc = call i16 @get16()
+ %rd = call i16 @get16()
+ %re = call i16 @get16()
+ %rf = call i16 @get16()
+ call void @test_force_spill_16_consumer(i16 %r0, i16 %r1, i16 %r2, i16 %r3, i16 %r4, i16 %r5, i16 %r6, i16 %r7, i16 %r8, i16 %r9, i16 %ra, i16 %rb, i16 %rc, i16 %rd, i16 %re, i16 %rf)
+ ret void
+}
+
+declare void @test_force_spill_16_consumer(i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16)
+
+define void @test_force_spill_32() {
+; CHECK-LABEL: test_force_spill_32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #124, %sp
+; CHECK-NEXT: .cfi_def_cfa_offset -128
+; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (80,%sp) ; 48-byte Folded Spill
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (76,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (68,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (64,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d2
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d3
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d4
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d5
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d6
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d7
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a2
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a3
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a4
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a5
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a6
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (72,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l (64,%sp), %a1
+; CHECK-NEXT: movem.l (68,%sp), %d1
+; CHECK-NEXT: move.l %sp, %a0
+; CHECK-NEXT: move.l %d0, (60,%a0)
+; CHECK-NEXT: movem.l (72,%sp), %d0
+; CHECK-NEXT: move.l %d0, (56,%a0)
+; CHECK-NEXT: movem.l (76,%sp), %d0
+; CHECK-NEXT: move.l %a6, (52,%a0)
+; CHECK-NEXT: move.l %a5, (48,%a0)
+; CHECK-NEXT: move.l %a4, (44,%a0)
+; CHECK-NEXT: move.l %a3, (40,%a0)
+; CHECK-NEXT: move.l %a2, (36,%a0)
+; CHECK-NEXT: move.l %d7, (32,%a0)
+; CHECK-NEXT: move.l %d6, (28,%a0)
+; CHECK-NEXT: move.l %d5, (24,%a0)
+; CHECK-NEXT: move.l %d4, (20,%a0)
+; CHECK-NEXT: move.l %d3, (16,%a0)
+; CHECK-NEXT: move.l %d2, (12,%a0)
+; CHECK-NEXT: move.l %a1, (8,%a0)
+; CHECK-NEXT: move.l %d1, (4,%a0)
+; CHECK-NEXT: move.l %d0, (%a0)
+; CHECK-NEXT: jsr test_force_spill_32_consumer
+; CHECK-NEXT: movem.l (80,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload
+; CHECK-NEXT: adda.l #124, %sp
+; CHECK-NEXT: rts
+ start:
+ %r0 = call i32 @get32()
+ %r1 = call i32 @get32()
+ %r2 = call i32 @get32()
+ %r3 = call i32 @get32()
+ %r4 = call i32 @get32()
+ %r5 = call i32 @get32()
+ %r6 = call i32 @get32()
+ %r7 = call i32 @get32()
+ %r8 = call i32 @get32()
+ %r9 = call i32 @get32()
+ %ra = call i32 @get32()
+ %rb = call i32 @get32()
+ %rc = call i32 @get32()
+ %rd = call i32 @get32()
+ %re = call i32 @get32()
+ %rf = call i32 @get32()
+ call void @test_force_spill_32_consumer(i32 %r0, i32 %r1, i32 %r2, i32 %r3, i32 %r4, i32 %r5, i32 %r6, i32 %r7, i32 %r8, i32 %r9, i32 %ra, i32 %rb, i32 %rc, i32 %rd, i32 %re, i32 %rf)
+ ret void
+}
+
+declare void @test_force_spill_32_consumer(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define void @test_force_spill_mixed() {
+; CHECK-LABEL: test_force_spill_mixed:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #148, %sp
+; CHECK-NEXT: .cfi_def_cfa_offset -152
+; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (104,%sp) ; 48-byte Folded Spill
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (102,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (86,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (80,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d2
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d3
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d4
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %d5
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %d6
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: move.b %d0, %d7
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a2
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a3
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a4
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (100,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: move.w %d0, %a5
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: move.l %d0, %a6
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (98,%sp)
+; CHECK-NEXT: jsr get8
+; CHECK-NEXT: movem.w %d0, (96,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.w %d0, (94,%sp)
+; CHECK-NEXT: jsr get32
+; CHECK-NEXT: movem.l %d0, (88,%sp)
+; CHECK-NEXT: jsr get16
+; CHECK-NEXT: movem.l (80,%sp), %a1
+; CHECK-NEXT: movem.w (86,%sp), %d1
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %sp, %a0
+; CHECK-NEXT: move.l %d0, (76,%a0)
+; CHECK-NEXT: movem.l (88,%sp), %d0
+; CHECK-NEXT: move.l %d0, (72,%a0)
+; CHECK-NEXT: movem.w (94,%sp), %d0
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %d0, (68,%a0)
+; CHECK-NEXT: movem.w (96,%sp), %d0
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (64,%a0)
+; CHECK-NEXT: movem.w (98,%sp), %d0
+; CHECK-NEXT: and.l #65535, %d0
+; CHECK-NEXT: move.l %d0, (60,%a0)
+; CHECK-NEXT: movem.w (100,%sp), %d0
+; CHECK-NEXT: move.l %a6, (56,%a0)
+; CHECK-NEXT: and.l #65535, %a5
+; CHECK-NEXT: move.l %a5, (52,%a0)
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (48,%a0)
+; CHECK-NEXT: movem.w (102,%sp), %d0
+; CHECK-NEXT: and.l #65535, %a4
+; CHECK-NEXT: move.l %a4, (44,%a0)
+; CHECK-NEXT: move.l %a3, (40,%a0)
+; CHECK-NEXT: and.l #65535, %a2
+; CHECK-NEXT: move.l %a2, (36,%a0)
+; CHECK-NEXT: and.l #255, %d7
+; CHECK-NEXT: move.l %d7, (32,%a0)
+; CHECK-NEXT: and.l #65535, %d6
+; CHECK-NEXT: move.l %d6, (28,%a0)
+; CHECK-NEXT: move.l %d5, (24,%a0)
+; CHECK-NEXT: and.l #65535, %d4
+; CHECK-NEXT: move.l %d4, (20,%a0)
+; CHECK-NEXT: and.l #255, %d3
+; CHECK-NEXT: move.l %d3, (16,%a0)
+; CHECK-NEXT: and.l #65535, %d2
+; CHECK-NEXT: move.l %d2, (12,%a0)
+; CHECK-NEXT: move.l %a1, (8,%a0)
+; CHECK-NEXT: and.l #65535, %d1
+; CHECK-NEXT: move.l %d1, (4,%a0)
+; CHECK-NEXT: and.l #255, %d0
+; CHECK-NEXT: move.l %d0, (%a0)
+; CHECK-NEXT: jsr test_force_spill_mixed_consumer
+; CHECK-NEXT: movem.l (104,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload
+; CHECK-NEXT: adda.l #148, %sp
+; CHECK-NEXT: rts
+ start:
+ %r0 = call i8 @get8()
+ %r1 = call i16 @get16()
+ %r2 = call i32 @get32()
+ %r3 = call i16 @get16()
+ %r4 = call i8 @get8()
+ %r5 = call i16 @get16()
+ %r6 = call i32 @get32()
+ %r7 = call i16 @get16()
+ %r8 = call i8 @get8()
+ %r9 = call i16 @get16()
+ %ra = call i32 @get32()
+ %rb = call i16 @get16()
+ %rc = call i8 @get8()
+ %rd = call i16 @get16()
+ %re = call i32 @get32()
+ %rf = call i16 @get16()
+ %rg = call i8 @get8()
+ %rh = call i16 @get16()
+ %ri = call i32 @get32()
+ %rj = call i16 @get16()
+ call void @test_force_spill_mixed_consumer(i8 %r0, i16 %r1, i32 %r2, i16 %r3, i8 %r4, i16 %r5, i32 %r6, i16 %r7, i8 %r8, i16 %r9, i32 %ra, i16 %rb, i8 %rc, i16 %rd, i32 %re, i16 %rf, i8 %rg, i16 %rh, i32 %ri, i16 %rj)
+ ret void
+}
+
+declare void @test_force_spill_mixed_consumer(i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16)
More information about the llvm-commits
mailing list