[PATCH] ARM: use lit-pools for global variables on "minsize" functions
Tim Northover
t.p.northover at gmail.com
Mon Dec 2 05:55:19 PST 2013
Hi all,
On average, it's a good idea to use litpools to materialise global variables when optimising for minimum size at the expense of performance (LDR + const <= 8 bytes, consts might be shareable; MOVW + MOVT = 8 bytes, no sharing possible).
This patch copies the MachineFunction MinSize attribute into ARMSubtarget so that it's available for TableGen to base its decisions on (via UseMovt). I considered an alternative of handling it entirely in ARMISelLowering, but I think that would proliferate yet more ARMISD nodes.
Does it look good?
Cheers.
Tim
http://llvm-reviews.chandlerc.com/D2305
Files:
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMBaseInstrInfo.h
lib/Target/ARM/ARMFrameLowering.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMSubtarget.cpp
lib/Target/ARM/ARMSubtarget.h
lib/Target/ARM/Thumb1FrameLowering.cpp
lib/Target/ARM/Thumb2SizeReduction.cpp
test/CodeGen/ARM/minsize-litpools.ll
Index: lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1859,12 +1859,12 @@
}
}
-bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
- MachineInstr *MI,
+bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
+ MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ if (!Subtarget.isMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
Index: lib/Target/ARM/ARMBaseInstrInfo.h
===================================================================
--- lib/Target/ARM/ARMBaseInstrInfo.h
+++ lib/Target/ARM/ARMBaseInstrInfo.h
@@ -417,7 +417,8 @@
/// NumBytes. This can save a few bytes per function in code-size, but
/// obviously generates more memory traffic. As such, it only takes
/// effect in functions being optimised for size.
-bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI,
+bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
+ MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
Index: lib/Target/ARM/ARMFrameLowering.cpp
===================================================================
--- lib/Target/ARM/ARMFrameLowering.cpp
+++ lib/Target/ARM/ARMFrameLowering.cpp
@@ -256,7 +256,7 @@
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes))
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
FramePtrOffsetInPush += NumBytes;
else
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -434,7 +434,8 @@
ARM::SP)
.addReg(FramePtr));
}
- } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, FirstPop, NumBytes))
+ } else if (NumBytes &&
+ !tryFoldSPUpdateIntoPushPop(STI, MF, FirstPop, NumBytes))
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -1745,8 +1745,7 @@
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = Subtarget->isMinSize();
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
Index: lib/Target/ARM/ARMSubtarget.cpp
===================================================================
--- lib/Target/ARM/ARMSubtarget.cpp
+++ lib/Target/ARM/ARMSubtarget.cpp
@@ -102,6 +102,7 @@
HasVFPv4 = false;
HasFPARMv8 = false;
HasNEON = false;
+ MinSize = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
SlowFPVMLx = false;
@@ -151,6 +152,9 @@
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
+
+ MinSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
Index: lib/Target/ARM/ARMSubtarget.h
===================================================================
--- lib/Target/ARM/ARMSubtarget.h
+++ lib/Target/ARM/ARMSubtarget.h
@@ -64,6 +64,10 @@
bool HasFPARMv8;
bool HasNEON;
+ /// MinSize - True if the function being compiled has the "minsize" attribute
+ /// and should be optimised for size at the expense of speed.
+ bool MinSize;
+
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
@@ -270,6 +274,7 @@
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
+ bool isMinSize() const { return MinSize; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -327,7 +332,7 @@
bool isR9Reserved() const { return IsR9Reserved; }
- bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool useMovt() const { return UseMovt && !isMinSize(); }
bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
Index: lib/Target/ARM/Thumb1FrameLowering.cpp
===================================================================
--- lib/Target/ARM/Thumb1FrameLowering.cpp
+++ lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -165,7 +165,7 @@
NumBytes = DPRCSOffset;
int FramePtrOffsetInBlock = 0;
- if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, prior(MBBI), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
NumBytes = 0;
}
@@ -291,9 +291,9 @@
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
- if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
+ if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
+ } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}
Index: lib/Target/ARM/Thumb2SizeReduction.cpp
===================================================================
--- lib/Target/ARM/Thumb2SizeReduction.cpp
+++ lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1012,8 +1012,7 @@
AttributeSet FnAttrs = MF.getFunction()->getAttributes();
OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
- MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize);
+ MinimizeSize = STI->isMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
Index: test/CodeGen/ARM/minsize-litpools.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/minsize-litpools.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=thumbv7s %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7s %s -o - | FileCheck %s
+
+; CodeGen should be able to set and reset the MinSize subtarget-feature, and
+; make use of it in deciding whether to use MOVW/MOVT for global variables or a
+; lit-pool load (saving roughly 2 bytes of code).
+
+ at var = global i32 0
+
+define i32 @small_global() minsize {
+; CHECK-LABEL: small_global:
+; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
+; CHECK: ldr r0, [r[[GLOBDEST]]]
+
+ %val = load i32* @var
+ ret i32 %val
+}
+
+define i32 @big_global() {
+; CHECK-LABEL: big_global:
+; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
+; CHECK: movt [[GLOBDEST]], :upper16:var
+
+ %val = load i32* @var
+ ret i32 %val
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2305.1.patch
Type: text/x-patch
Size: 7573 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131202/16c6e3cc/attachment.bin>
More information about the llvm-commits
mailing list