[llvm] 112d769 - [ARM] generate correct code for armv6-m XO big stack operations
Ties Stuij via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 4 02:40:12 PDT 2023
Author: Ties Stuij
Date: 2023-07-04T10:40:06+01:00
New Revision: 112d769e5ee8b7e961122ead3943f06119c05c5b
URL: https://github.com/llvm/llvm-project/commit/112d769e5ee8b7e961122ead3943f06119c05c5b
DIFF: https://github.com/llvm/llvm-project/commit/112d769e5ee8b7e961122ead3943f06119c05c5b.diff
LOG: [ARM] generate correct code for armv6-m XO big stack operations
The ARM backend codebase is dotted with places where armv6-m will generate
constant pools. Now that we can generate execute-only code for armv6-m, we need
to make sure we use the movs/lsls/adds/lsls/adds/lsls/adds pattern instead of
these.
Big stacks is one of the obvious places. In this patch we take care of two
sites:
1. take care of big stacks in prologue/epilogue
2. take care of save/tSTRspi nodes, which implicitly fixes
emitThumbRegPlusImmInReg which is used in several frame lowering fns
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D154233
Added:
Modified:
llvm/lib/Target/ARM/ARMAsmPrinter.cpp
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
llvm/test/CodeGen/ARM/large-stack.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index b1074d1265ff9b..69df1d12aa8e28 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1140,10 +1140,24 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tLDRpci:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
+ case ARM::tMOVi8:
+ case ARM::tADDi8:
+ case ARM::tLSLri:
// special cases:
// 1) for Thumb1 code we sometimes materialize the constant via constpool
// load.
- // 2) for Thumb2 execute only code we materialize the constant via
+ // 2) for Thumb1 execute only code we materialize the constant via the
+ // following pattern:
+ // movs r3, #:upper8_15:<const>
+ // lsls r3, #8
+ // adds r3, #:upper0_7:<const>
+ // lsls r3, #8
+ // adds r3, #:lower8_15:<const>
+ // lsls r3, #8
+ // adds r3, #:lower0_7:<const>
+ // So we need to special-case MOVS, ADDS and LSLS, and keep track of
+ // where we are in the sequence with the simplest of state machines.
+ // 3) for Thumb2 execute only code we materialize the constant via
// immediate constants in 2 separate instructions (MOVW/MOVT).
SrcReg = ~0U;
DstReg = MI->getOperand(0).getReg();
@@ -1334,6 +1348,23 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = MI->getOperand(2).getImm();
AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
break;
+ case ARM::tMOVi8:
+ Offset = MI->getOperand(2).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ case ARM::tLSLri:
+ assert(MI->getOperand(3).getImm() == 8 &&
+ "The shift amount is not equal to 8");
+ assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
+ "The source register is not equal to the destination register");
+ AFI->EHPrologueOffsetInRegs[DstReg] <<= 8;
+ break;
+ case ARM::tADDi8:
+ assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
+ "The source register is not equal to the destination register");
+ Offset = MI->getOperand(3).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] += Offset;
+ break;
case ARM::t2PAC:
case ARM::t2PACBTI:
AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE;
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 9855e48b623e47..c2962c4857c3c0 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -81,8 +81,9 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
if (ST.genExecuteOnly()) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
- .addImm(NumBytes).setMIFlags(MIFlags);
+ unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
+ BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg)
+ .addImm(NumBytes).setMIFlags(MIFlags);
} else {
MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
0, MIFlags);
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index a29095e6b81af3..0c010ed1eb3423 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -116,9 +116,10 @@ void ThumbRegisterInfo::emitLoadConstPool(
PredReg, MIFlags);
}
-/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
-/// in a register using mov / mvn sequences or load the immediate from a
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize a
+/// destreg = basereg + immediate in Thumb code. Materialize the immediate in a
+/// register using mov / mvn (armv6-M >) sequences, movs / lsls / adds / lsls /
+/// adds / lsls / adds sequences (armv6-M) or load the immediate from a
/// constpool entry.
static void emitThumbRegPlusImmInReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -159,7 +160,8 @@ static void emitThumbRegPlusImmInReg(
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg)
+ unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
+ BuildMI(MBB, MBBI, dl, TII.get(XOInstr), LdReg)
.addImm(NumBytes).setMIFlags(MIFlags);
} else
MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
diff --git a/llvm/test/CodeGen/ARM/large-stack.ll b/llvm/test/CodeGen/ARM/large-stack.ll
index aedd8aa911fe27..e7bed0b5e36ccc 100644
--- a/llvm/test/CodeGen/ARM/large-stack.ll
+++ b/llvm/test/CodeGen/ARM/large-stack.ll
@@ -1,21 +1,54 @@
; RUN: llc -mtriple=arm-eabi %s -o /dev/null
-; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o -
+; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o - -filetype=obj | \
+; RUN: llvm-objdump -d --no-leading-addr --no-show-raw-insn - | Filecheck %s
define void @test1() {
- %tmp = alloca [ 64 x i32 ] , align 4
+; CHECK-LABEL: <test1>:
+;; are we using correct prologue immediate materialization pattern for
+;; execute only
+; CHECK: sub sp, #0x100
+%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
define void @test2() {
+; CHECK-LABEL: <test2>:
+;; are we using correct prologue immediate materialization pattern for
+;; execute-only
+; CHECK: movs [[REG:r[0-9]+]], #0xff
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xff
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xef
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xb8
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
define i32 @test3() {
+;; are we using correct prologue immediate materialization pattern for
+;; execute-only
+; CHECK-LABEL: <test3>:
+; CHECK: movs [[REG:r[0-9]+]], #0xcf
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xff
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xff
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0xf0
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
- %a = alloca [805306369 x i8], align 16
+ %a = alloca [u0x30000001 x i8], align 16
store i32 0, ptr %tmp
+;; are we choosing correct store/tSTRspi pattern for execute-only
+; CHECK: movs [[REG:r[0-9]+]], #0x30
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0x0
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0x0
+; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
+; CHECK-NEXT: adds [[REG]], #0x8
%tmp1 = load i32, ptr %tmp
ret i32 %tmp1
}
More information about the llvm-commits
mailing list