[llvm] r342127 - ARM: align loops to 4 bytes on Cortex-M3 and Cortex-M4.
Tim Northover via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 13 03:28:05 PDT 2018
Author: tnorthover
Date: Thu Sep 13 03:28:05 2018
New Revision: 342127
URL: http://llvm.org/viewvc/llvm-project?rev=342127&view=rev
Log:
ARM: align loops to 4 bytes on Cortex-M3 and Cortex-M4.
The Technical Reference Manuals for these two CPUs state that branching
to an unaligned 32-bit instruction incurs an extra pipeline reload
penalty. That's bad.
This also enables the optimization at -Os since it costs on average one
byte per loop in return for 1 cycle per iteration, which is pretty good
going.
Added:
llvm/trunk/test/CodeGen/ARM/loop-align-cortex-m.ll
Modified:
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
llvm/trunk/lib/Target/ARM/ARM.td
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMSubtarget.h
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Thu Sep 13 03:28:05 2018
@@ -1435,6 +1435,12 @@ public:
return PrefLoopAlignment;
}
+ /// Should loops be aligned even when the function is marked OptSize (but not
+ /// MinSize).
+ virtual bool alignLoopsWithOptSize() const {
+ return false;
+ }
+
/// If the target has a standard location for the stack protector guard,
/// returns the address of that location. Otherwise, returns nullptr.
/// DEPRECATED: please override useLoadStackGuardNode and customize
Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Thu Sep 13 03:28:05 2018
@@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks(
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction().optForSize())
+ if (F->getFunction().optForMinSize() ||
+ (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Thu Sep 13 03:28:05 2018
@@ -265,6 +265,9 @@ def FeatureVMLxForwarding : SubtargetFea
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2",
+ "Prefer 32-bit alignment for loops">;
+
/// Some instructions update CPSR partially, which can add false dependency for
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
@@ -936,6 +939,7 @@ def : ProcessorModel<"cortex-r8", Cort
def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
ProcM3,
+ FeaturePrefLoopAlign32,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
@@ -946,6 +950,7 @@ def : ProcessorModel<"cortex-m4", Cortex
FeatureVFP4,
FeatureVFPOnlySP,
FeatureD16,
+ FeaturePrefLoopAlign32,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
@@ -960,6 +965,7 @@ def : ProcessorModel<"cortex-m33", Corte
FeatureFPARMv8,
FeatureD16,
FeatureVFPOnlySP,
+ FeaturePrefLoopAlign32,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Sep 13 03:28:05 2018
@@ -1199,6 +1199,8 @@ ARMTargetLowering::ARMTargetLowering(con
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
+ setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
+
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -14695,6 +14697,11 @@ Value *ARMTargetLowering::emitStoreCondi
Addr});
}
+
+bool ARMTargetLowering::alignLoopsWithOptSize() const {
+ return Subtarget->isMClass();
+}
+
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu Sep 13 03:28:05 2018
@@ -575,6 +575,8 @@ class VectorType;
bool isLegalInterleavedAccessType(VectorType *VecTy,
const DataLayout &DL) const;
+ bool alignLoopsWithOptSize() const override;
+
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
unsigned getNumInterleavedAccesses(VectorType *VecTy,
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=342127&r1=342126&r2=342127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Thu Sep 13 03:28:05 2018
@@ -438,6 +438,9 @@ protected:
/// operand cycle returned by the itinerary data for pre-ISel operands.
int PreISelOperandLatencyAdjustment = 2;
+ /// What alignment is preferred for loop bodies, in log2(bytes).
+ unsigned PrefLoopAlignment = 0;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
@@ -804,6 +807,10 @@ public:
bool allowPositionIndependentMovt() const {
return isROPI() || !isTargetELF();
}
+
+ unsigned getPrefLoopAlignment() const {
+ return PrefLoopAlignment;
+ }
};
} // end namespace llvm
Added: llvm/trunk/test/CodeGen/ARM/loop-align-cortex-m.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/loop-align-cortex-m.ll?rev=342127&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/loop-align-cortex-m.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/loop-align-cortex-m.ll Thu Sep 13 03:28:05 2018
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m3 -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m4 -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m33 -o - | FileCheck %s
+
+define void @test_loop_alignment(i32* %in, i32* %out) optsize {
+; CHECK-LABEL: test_loop_alignment:
+; CHECK: movs {{r[0-9]+}}, #0
+; CHECK: .p2align 2
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %in.addr = getelementptr inbounds i32, i32* %in, i32 %i
+ %lhs = load i32, i32* %in.addr, align 4
+ %res = mul nsw i32 %lhs, 5
+ %out.addr = getelementptr inbounds i32, i32* %out, i32 %i
+ store i32 %res, i32* %out.addr, align 4
+ %i.next = add i32 %i, 1
+ %done = icmp eq i32 %i.next, 1024
+ br i1 %done, label %end, label %loop
+
+end:
+ ret void
+}
+
+define void @test_loop_alignment_minsize(i32* %in, i32* %out) minsize {
+; CHECK-LABEL: test_loop_alignment_minsize:
+; CHECK: movs {{r[0-9]+}}, #0
+; CHECK-NOT: .p2align
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %in.addr = getelementptr inbounds i32, i32* %in, i32 %i
+ %lhs = load i32, i32* %in.addr, align 4
+ %res = mul nsw i32 %lhs, 5
+ %out.addr = getelementptr inbounds i32, i32* %out, i32 %i
+ store i32 %res, i32* %out.addr, align 4
+ %i.next = add i32 %i, 1
+ %done = icmp eq i32 %i.next, 1024
+ br i1 %done, label %end, label %loop
+
+end:
+ ret void
+}
More information about the llvm-commits
mailing list