[llvm-commits] [llvm] r171946 - in /llvm/trunk: include/llvm/CodeGen/TargetSchedule.h include/llvm/MC/MCSchedule.h include/llvm/Target/TargetSchedule.td lib/CodeGen/MachineScheduler.cpp lib/Target/ARM/ARMScheduleA9.td lib/Target/X86/X86Schedule.td lib/Target/X86/X86ScheduleAtom.td test/CodeGen/ARM/misched-inorder-latency.ll test/CodeGen/PowerPC/misched-inorder-latency.ll utils/TableGen/SubtargetEmitter.cpp
Andrew Trick
atrick at apple.com
Tue Jan 8 19:36:49 PST 2013
Author: atrick
Date: Tue Jan 8 21:36:49 2013
New Revision: 171946
URL: http://llvm.org/viewvc/llvm-project?rev=171946&view=rev
Log:
MIsched: add an ILP window property to machine model.
This was an experimental option, but needs to be defined
per-target. e.g. PPC A2 needs to aggressively hide latency.
I converted some in-order scheduling tests to A2. Hal is working on
more test cases.
Added:
llvm/trunk/test/CodeGen/PowerPC/misched-inorder-latency.ll
- copied, changed from r171943, llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll
Removed:
llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll
Modified:
llvm/trunk/include/llvm/CodeGen/TargetSchedule.h
llvm/trunk/include/llvm/MC/MCSchedule.h
llvm/trunk/include/llvm/Target/TargetSchedule.td
llvm/trunk/lib/CodeGen/MachineScheduler.cpp
llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/utils/TableGen/SubtargetEmitter.cpp
Modified: llvm/trunk/include/llvm/CodeGen/TargetSchedule.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetSchedule.h?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetSchedule.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetSchedule.h Tue Jan 8 21:36:49 2013
@@ -84,6 +84,9 @@
/// \brief Maximum number of micro-ops that may be scheduled per cycle.
unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
+ /// \brief Number of cycles the OOO processor is expected to hide.
+ unsigned getILPWindow() const { return SchedModel.ILPWindow; }
+
/// \brief Return the number of issue slots required for this MI.
unsigned getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC = 0) const;
Modified: llvm/trunk/include/llvm/MC/MCSchedule.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCSchedule.h?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCSchedule.h (original)
+++ llvm/trunk/include/llvm/MC/MCSchedule.h Tue Jan 8 21:36:49 2013
@@ -155,7 +155,7 @@
// Optional InstrItinerary OperandCycles provides expected latency.
// TODO: can't yet specify both min and expected latency per operand.
int MinLatency;
- static const unsigned DefaultMinLatency = -1;
+ static const int DefaultMinLatency = -1;
// LoadLatency is the expected latency of load instructions.
//
@@ -172,6 +172,16 @@
unsigned HighLatency;
static const unsigned DefaultHighLatency = 10;
+ // ILPWindow is the number of cycles that the scheduler effectively ignores
+ // before attempting to hide latency. This should be zero for in-order cpus to
+ // always hide expected latency. For out-of-order cpus, it may be tweaked as
+ // desired to roughly approximate instruction buffers. The actual threshold is
+ // not very important for an OOO processor, as long as it isn't too high. A
+ // nonzero value helps avoid rescheduling to hide latency when its is fairly
+ // obviously useless and makes register pressure heuristics more effective.
+ unsigned ILPWindow;
+ static const unsigned DefaultILPWindow = 0;
+
// MispredictPenalty is the typical number of extra cycles the processor
// takes to recover from a branch misprediction.
unsigned MispredictPenalty;
@@ -196,6 +206,7 @@
MinLatency(DefaultMinLatency),
LoadLatency(DefaultLoadLatency),
HighLatency(DefaultHighLatency),
+ ILPWindow(DefaultILPWindow),
MispredictPenalty(DefaultMispredictPenalty),
ProcID(0), ProcResourceTable(0), SchedClassTable(0),
NumProcResourceKinds(0), NumSchedClasses(0),
@@ -205,12 +216,12 @@
}
// Table-gen driven ctor.
- MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp,
- unsigned pi, const MCProcResourceDesc *pr,
+ MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp,
+ unsigned mp, unsigned pi, const MCProcResourceDesc *pr,
const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
const InstrItinerary *ii):
IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl),
- MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
+ ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc),
InstrItineraries(ii) {}
Modified: llvm/trunk/include/llvm/Target/TargetSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSchedule.td?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSchedule.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSchedule.td Tue Jan 8 21:36:49 2013
@@ -76,6 +76,7 @@
int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle.
int MinLatency = -1; // Determines which instrucions are allowed in a group.
// (-1) inorder (0) ooo, (1): inorder +var latencies.
+ int ILPWindow = -1; // Cycles of latency likely hidden by hardware buffers.
int LoadLatency = -1; // Cycles for loads to access the cache.
int HighLatency = -1; // Approximation of cycles for "high latency" ops.
int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Tue Jan 8 21:36:49 2013
@@ -48,15 +48,6 @@
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
-// Threshold to very roughly model an out-of-order processor's instruction
-// buffers. If the actual value of this threshold matters much in practice, then
-// it can be specified by the machine model. For now, it's an experimental
-// tuning knob to determine when and if it matters.
-static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
- cl::desc("Allow expected latency to exceed the critical path by N cycles "
- "before attempting to balance ILP"),
- cl::init(10U));
-
// Experimental heuristics
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -1297,7 +1288,8 @@
if (L > RemLatency)
RemLatency = L;
}
- if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
+ unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ if (RemLatency + ExpectedLatency >= CriticalPathLimit
&& RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
Policy.ReduceLatency = true;
DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Tue Jan 8 21:36:49 2013
@@ -1887,6 +1887,9 @@
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let ILPWindow = 10; // Don't reschedule small blocks to hide
+ // latency. Minimum latency requirements are already
+ // modeled strictly by reserving resources.
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue Jan 8 21:36:49 2013
@@ -470,12 +470,17 @@
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
// The GenericModel contains no instruciton itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0;
let LoadLatency = 4;
let HighLatency = 10;
+ let ILPWindow = 10;
}
include "X86ScheduleAtom.td"
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue Jan 8 21:36:49 2013
@@ -525,6 +525,7 @@
// OperandCycles may be used for expected latency.
let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let ILPWindow = 0; // Always try to hide expected latency.
let Itineraries = AtomItineraries;
}
Removed: llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll?rev=171945&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll (removed)
@@ -1,48 +0,0 @@
-; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \
-; RUN: -pre-RA-sched=source -scheditins=false -ilp-window=0 \
-; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
-;
-; For these tests, we set -ilp-window=0 to simulate in order processor.
-
-; %val1 is a 3-cycle load live out of %entry. It should be hoisted
-; above the add.
-; CHECK: @testload
-; CHECK: %entry
-; CHECK: ldr
-; CHECK: adds
-; CHECK: bne
-; CHECK: %true
-define i32 @testload(i32 *%ptr, i32 %sumin) {
-entry:
- %sum1 = add i32 %sumin, 1
- %val1 = load i32* %ptr
- %p = icmp eq i32 %sumin, 0
- br i1 %p, label %true, label %end
-true:
- %sum2 = add i32 %sum1, 1
- %ptr2 = getelementptr i32* %ptr, i32 1
- %val = load i32* %ptr2
- %val2 = add i32 %val1, %val
- br label %end
-end:
- %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
- %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
- %sumout = add i32 %valmerge, %summerge
- ret i32 %sumout
-}
-
-; The prefetch gets a default latency of 3 cycles and should be hoisted
-; above the add.
-;
-; CHECK: @testprefetch
-; CHECK: %entry
-; CHECK: pld
-; CHECK: adds
-; CHECK: bx
-define i32 @testprefetch(i8 *%ptr, i32 %i) {
-entry:
- %tmp = add i32 %i, 1
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
- ret i32 %tmp
-}
-declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
Copied: llvm/trunk/test/CodeGen/PowerPC/misched-inorder-latency.ll (from r171943, llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/misched-inorder-latency.ll?p2=llvm/trunk/test/CodeGen/PowerPC/misched-inorder-latency.ll&p1=llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll&r1=171943&r2=171946&rev=171946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/misched-inorder-latency.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/misched-inorder-latency.ll Tue Jan 8 21:36:49 2013
@@ -1,15 +1,15 @@
-; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \
-; RUN: -pre-RA-sched=source -scheditins=false -ilp-window=0 \
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
;
-; For these tests, we set -ilp-window=0 to simulate in order processor.
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
-; %val1 is a 3-cycle load live out of %entry. It should be hoisted
+; %val1 is a load live out of %entry. It should be hoisted
; above the add.
-; CHECK: @testload
+; CHECK: testload:
; CHECK: %entry
-; CHECK: ldr
-; CHECK: adds
+; CHECK: lwz
+; CHECK: addi
; CHECK: bne
; CHECK: %true
define i32 @testload(i32 *%ptr, i32 %sumin) {
@@ -34,15 +34,22 @@
; The prefetch gets a default latency of 3 cycles and should be hoisted
; above the add.
;
-; CHECK: @testprefetch
+; CHECK: testprefetch:
; CHECK: %entry
-; CHECK: pld
-; CHECK: adds
-; CHECK: bx
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
define i32 @testprefetch(i8 *%ptr, i32 %i) {
entry:
- %tmp = add i32 %i, 1
+ %val1 = add i32 %i, 1
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
- ret i32 %tmp
+ %p = icmp eq i32 %i, 0
+ br i1 %p, label %true, label %end
+true:
+ %val2 = add i32 %val1, 1
+ br label %end
+end:
+ %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+ ret i32 %valmerge
}
declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
Modified: llvm/trunk/utils/TableGen/SubtargetEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/SubtargetEmitter.cpp?rev=171946&r1=171945&r2=171946&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/SubtargetEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/SubtargetEmitter.cpp Tue Jan 8 21:36:49 2013
@@ -1108,6 +1108,7 @@
EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ',');
EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
+ EmitProcessorProp(OS, PI->ModelDef, "ILPWindow", ',');
EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
OS << " " << PI->Index << ", // Processor ID\n";
if (PI->hasInstrSchedModel())
More information about the llvm-commits
mailing list