[llvm] 01dc107 - [ARM] unwinding .pad instructions missing in execute-only prologue
Keith Walker via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 7 03:52:35 PDT 2020
Author: Keith Walker
Date: 2020-04-07T11:51:59+01:00
New Revision: 01dc10774eb87bc2f114a0e2d77083dbec62226b
URL: https://github.com/llvm/llvm-project/commit/01dc10774eb87bc2f114a0e2d77083dbec62226b
DIFF: https://github.com/llvm/llvm-project/commit/01dc10774eb87bc2f114a0e2d77083dbec62226b.diff
LOG: [ARM] unwinding .pad instructions missing in execute-only prologue
If the stack pointer is altered for local variables and we are generating
Thumb2 execute-only code the .pad directive is missing.
Usually the size of the adjustment is stored in a PC-relative location
and loaded into a register which is then added to the stack pointer.
However when we are generating execute-only code code the size of the
adjustment is instead generated using the MOVW/MOVT instruction pair.
As a by product of handling the execute-only case this also fixes an
existing issue that in the none execute-only case the .pad directive was
generated against the load of the constant to a register instruction,
instead of the instruction which adds the register to the stack pointer.
Differential Revision: https://reviews.llvm.org/D76849
Added:
llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll
Modified:
llvm/lib/Target/ARM/ARMAsmPrinter.cpp
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 1342f2bf5ad4..d0adb24437d6 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1084,16 +1084,26 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
- if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
- // Two special cases:
- // 1) tPUSH does not have src/dst regs.
- // 2) for Thumb1 code we sometimes materialize the constant via constpool
- // load. Yes, this is pretty fragile, but for now I don't see better
- // way... :(
+ switch (Opc) {
+ case ARM::tPUSH:
+ // special case: tPUSH does not have src/dst regs.
SrcReg = DstReg = ARM::SP;
- } else {
+ break;
+ case ARM::tLDRpci:
+ case ARM::t2MOVi16:
+ case ARM::t2MOVTi16:
+ // special cases:
+ // 1) for Thumb1 code we sometimes materialize the constant via constpool
+ // load.
+ // 2) for Thumb2 execute only code we materialize the constant via
+ // immediate constants in 2 seperate instructions (MOVW/MOVT).
+ SrcReg = ~0U;
+ DstReg = MI->getOperand(0).getReg();
+ break;
+ default:
SrcReg = MI->getOperand(1).getReg();
DstReg = MI->getOperand(0).getReg();
+ break;
}
// Try to figure out the unwinding opcode out of src / dst regs.
@@ -1197,23 +1207,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tADDrSPi:
Offset = -MI->getOperand(2).getImm()*4;
break;
- case ARM::tLDRpci: {
- // Grab the constpool index and check, whether it corresponds to
- // original or cloned constpool entry.
- unsigned CPI = MI->getOperand(1).getIndex();
- const MachineConstantPool *MCP = MF.getConstantPool();
- if (CPI >= MCP->getConstants().size())
- CPI = AFI->getOriginalCPIdx(CPI);
- assert(CPI != -1U && "Invalid constpool index");
-
- // Derive the actual offset.
- const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
- assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
- // FIXME: Check for user, it should be "add" instruction!
- Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ case ARM::tADDhirr:
+ Offset =
+ -AFI->EHPrologueOffsetInRegs.lookup(MI->getOperand(2).getReg());
break;
}
- }
if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
@@ -1233,14 +1231,43 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
} else if (DstReg == ARM::SP) {
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
- } else if (Opc == ARM::tMOVr) {
- // If a Thumb1 function spills r8-r11, we copy the values to low
- // registers before pushing them. Record the copy so we can emit the
- // correct ".save" later.
- AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
} else {
- MI->print(errs());
- llvm_unreachable("Unsupported opcode for unwinding information");
+ int64_t Offset = 0;
+ switch (Opc) {
+ case ARM::tMOVr:
+ // If a Thumb1 function spills r8-r11, we copy the values to low
+ // registers before pushing them. Record the copy so we can emit the
+ // correct ".save" later.
+ AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI->getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ Offset = cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ }
+ case ARM::t2MOVi16:
+ Offset = MI->getOperand(1).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ case ARM::t2MOVTi16:
+ Offset = MI->getOperand(2).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
+ break;
+ default:
+ MI->print(errs());
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
}
}
}
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9fc566761c5c..4afa3a47cefe 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -852,10 +852,13 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
LO16.cloneMemRefs(MI);
HI16.cloneMemRefs(MI);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@@ -867,6 +870,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
+ unsigned MIFlags = MI.getFlags();
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
@@ -880,6 +884,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
+
switch (MO.getType()) {
case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7adf52e1598f..85c6837b72ce 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -245,6 +245,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
}
DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
+ DenseMap<unsigned, unsigned> EHPrologueOffsetInRegs;
void setPreservesR0() { PreservesR0 = true; }
bool getPreservesR0() const { return PreservesR0; }
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c5ca64b0d78a..5676c4f411a2 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,8 +88,10 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
0, MIFlags);
}
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
- .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .addReg(ARM::SP)
+ .addReg(ScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
return;
}
// FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
index 13d28514a71b..e11c5bb7178c 100644
--- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
+++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
@@ -9,8 +9,8 @@ define void @vla_emergency_spill(i32 %n) {
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: .setfp r7, sp, #12
; CHECK-NEXT: add r7, sp, #12
-; CHECK-NEXT: .pad #4100
; CHECK-NEXT: ldr r6, .LCPI0_0
+; CHECK-NEXT: .pad #4100
; CHECK-NEXT: add sp, r6
; CHECK-NEXT: mov r6, sp
; CHECK-NEXT: adds r0, r0, #7
@@ -59,8 +59,8 @@ define void @simple_emergency_spill(i32 %n) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #8196
; CHECK-NEXT: ldr r7, .LCPI1_0
+; CHECK-NEXT: .pad #8196
; CHECK-NEXT: add sp, r7
; CHECK-NEXT: add r0, sp, #4
; CHECK-NEXT: ldr r1, .LCPI1_2
@@ -119,8 +119,8 @@ define void @simple_emergency_spill_nor7(i32 %n) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .pad #8196
; CHECK-NEXT: ldr r6, .LCPI2_0
+; CHECK-NEXT: .pad #8196
; CHECK-NEXT: add sp, r6
; CHECK-NEXT: add r0, sp, #4
; CHECK-NEXT: ldr r1, .LCPI2_2
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll b/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll
new file mode 100644
index 000000000000..a8f427502f8d
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi | FileCheck %s
+
+define void @fn() {
+entry:
+; CHECK-LABEL: fn:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: ldr r6, .LCPI0_0
+; CHECK-NEXT: .pad #1600
+; CHECK-NEXT: add sp, r6
+; CHECK: .LCPI0_0:
+; CHECK_NEXT: long 4294963196
+ %a = alloca [400 x i32], align 4
+ %arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0
+ call void @bar(i32* %arraydecay)
+ ret void
+}
+
+define void @execute_only_fn() #0 {
+entry:
+; CHECK-LABEL: execute_only_fn:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: movw r6, #63936
+; CHECK-NEXT: movt r6, #65535
+; CHECK-NEXT: .pad #1600
+; CHECK-NEXT: add sp, r6
+ %a = alloca [400 x i32], align 4
+ %arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0
+ call void @bar(i32* %arraydecay)
+ ret void
+}
+
+declare dso_local void @bar(i32*)
+
+attributes #0 = { noinline optnone "target-features"="+armv8-m.base,+execute-only,+thumb-mode" }
More information about the llvm-commits
mailing list