[llvm] r365014 - [ARM] Thumb2: favor R4-R7 over R12/LR in allocation order when opt for minsize
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 02:58:52 PDT 2019
Author: ostannard
Date: Wed Jul 3 02:58:52 2019
New Revision: 365014
URL: http://llvm.org/viewvc/llvm-project?rev=365014&view=rev
Log:
[ARM] Thumb2: favor R4-R7 over R12/LR in allocation order when opt for minsize
For Thumb2, we prefer low regs (costPerUse = 0) to allow narrow
encoding. However, current allocation order is like:
R0-R3, R12, LR, R4-R11
As a result, a lot of instructs that use R12/LR will be wide instrs.
This patch changes the allocation order to:
R0-R7, R12, LR, R8-R11
for thumb2 and -Osize.
In most cases, there is no extra push/pop instrs as they will be folded
into existing ones. There might be slight performance impact due to more
stack usage, so we only enable it when opt for min size.
https://reviews.llvm.org/D30324
Added:
llvm/trunk/test/CodeGen/ARM/favor-low-reg-for-Osize.ll
Modified:
llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h
llvm/trunk/lib/CodeGen/RegisterClassInfo.cpp
llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
llvm/trunk/lib/Target/ARM/ARMSubtarget.h
llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll
Modified: llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h Wed Jul 3 02:58:52 2019
@@ -291,6 +291,14 @@ public:
/// This is called after a .mir file was loaded.
virtual void mirFileLoaded(MachineFunction &MF) const;
+
+ /// True if the register allocator should use the allocation orders exactly as
+ /// written in the tablegen descriptions, false if it should allocate
+ /// the specified physical register later if is it callee-saved.
+ virtual bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return false;
+ }
};
} // end namespace llvm
Modified: llvm/trunk/lib/CodeGen/RegisterClassInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterClassInfo.cpp?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegisterClassInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegisterClassInfo.cpp Wed Jul 3 02:58:52 2019
@@ -90,6 +90,7 @@ void RegisterClassInfo::runOnMachineFunc
void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
assert(RC && "no register class given");
RCInfo &RCI = RegClass[RC->getID()];
+ auto &STI = MF->getSubtarget();
// Raw register count, including all reserved regs.
unsigned NumRegs = RC->getNumRegs();
@@ -114,7 +115,8 @@ void RegisterClassInfo::compute(const Ta
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CalleeSavedAliases[PhysReg])
+ if (CalleeSavedAliases[PhysReg] &&
+ !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Wed Jul 3 02:58:52 2019
@@ -227,9 +227,10 @@ def GPR : RegisterClass<"ARM", [i32], 32
// know how to spill them. If we make our prologue/epilogue code smarter at
// some point, we can go back to using the above allocation orders for the
// Thumb1 instructions that know how to use hi regs.
- let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8),
+ (add (trunc GPR, 8), R12, LR, (shl GPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r15]";
}
@@ -238,9 +239,10 @@ def GPR : RegisterClass<"ARM", [i32], 32
// certain operand slots, particularly as the destination. Primarily
// useful for disassembly.
def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
- let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8),
+ (add (trunc GPRnopc, 8), R12, LR, (shl GPRnopc, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r14]";
}
@@ -295,9 +297,10 @@ def GPRlr : RegisterClass<"ARM", [i32],
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
- let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8),
+ (add (trunc rGPR, 8), R12, LR, (shl rGPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticType = "rGPR";
}
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Wed Jul 3 02:58:52 2019
@@ -413,3 +413,45 @@ bool ARMSubtarget::useFastISel() const {
((isTargetMachO() && !isThumb1Only()) ||
(isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
}
+
+unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
+ // The GPR register class has multiple possible allocation orders, with
+ // tradeoffs preferred by different sub-architectures and optimisation goals.
+ // The allocation orders are:
+ // 0: (the default tablegen order, not used)
+ // 1: r14, r0-r13
+ // 2: r0-r7
+ // 3: r0-r7, r12, lr, r8-r11
+ // Note that the register allocator will change this order so that
+ // callee-saved registers are used later, as they require extra work in the
+ // prologue/epilogue (though we sometimes override that).
+
+ // For thumb1-only targets, only the low registers are allocatable.
+ if (isThumb1Only())
+ return 2;
+
+ // Allocate low registers first, so we can select more 16-bit instructions.
+ // We also (in ignoreCSRForAllocationOrder) override the default behaviour
+ // with regards to callee-saved registers, because pushing extra registers is
+ // much cheaper (in terms of code size) than using high registers. After
+ // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
+ // can return with the pop, don't need an extra "bx lr") and then the rest of
+ // the high registers.
+ if (isThumb2() && MF.getFunction().hasMinSize())
+ return 3;
+
+ // Otherwise, allocate in the default order, using LR first because saving it
+ // allows a shorter epilogue sequence.
+ return 1;
+}
+
+bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ // To minimize code size in Thumb2, we prefer the usage of low regs (lower
+ // cost per use) so we can use narrow encoding. By default, caller-saved
+ // registers (e.g. lr, r12) are always allocated first, regardless of
+ // their cost per use. When optForMinSize, we prefer the low regs even if
+ // they are CSR because usually push/pop can be folded into existing ones.
+ return isThumb2() && MF.getFunction().hasMinSize() &&
+ ARM::GPRRegClass.contains(PhysReg);
+}
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Wed Jul 3 02:58:52 2019
@@ -856,6 +856,10 @@ public:
unsigned getPrefLoopAlignment() const {
return PrefLoopAlignment;
}
+
+ bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const override;
+ unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
};
} // end namespace llvm
Modified: llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll?rev=365014&r1=365013&r2=365014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll Wed Jul 3 02:58:52 2019
@@ -60,7 +60,7 @@ entry:
while.body:
; CHECK: while.body
-; CHECK: mul r{{[0-9]+}}
+; CHECK: muls r{{[0-9]+}}
; CHECK: muls
%ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
%ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
Added: llvm/trunk/test/CodeGen/ARM/favor-low-reg-for-Osize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/favor-low-reg-for-Osize.ll?rev=365014&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/favor-low-reg-for-Osize.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/favor-low-reg-for-Osize.ll Wed Jul 3 02:58:52 2019
@@ -0,0 +1,29 @@
+; REQUIRES: asserts
+; RUN: llc -debug-only=regalloc < %s 2>%t | FileCheck %s --check-prefix=CHECK
+; RUN: FileCheck %s < %t --check-prefix=DEBUG
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7m--linux-gnueabi"
+
+
+; DEBUG: AllocationOrder(GPR) = [ $r0 $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r12 $lr $r8 $r9 $r10 $r11 ]
+
+define i32 @test_minsize(i32 %x) optsize minsize {
+; CHECK-LABEL: test_minsize:
+entry:
+; CHECK: mov r4, r0
+ tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
+; CHECK: mov r0, r4
+ ret i32 %x
+}
+
+; DEBUG: AllocationOrder(GPR) = [ $r0 $r1 $r2 $r3 $r12 $lr $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 ]
+
+define i32 @test_optsize(i32 %x) optsize {
+; CHECK-LABEL: test_optsize:
+entry:
+; CHECK: mov r12, r0
+ tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
+; CHECK: mov r0, r12
+ ret i32 %x
+}
More information about the llvm-commits
mailing list