[llvm-commits] [llvm] r115755 - in /llvm/trunk: include/llvm/Target/ lib/CodeGen/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ test/CodeGen/ARM/
Evan Cheng
evan.cheng at apple.com
Tue Oct 5 23:27:31 PDT 2010
Author: evancheng
Date: Wed Oct 6 01:27:31 2010
New Revision: 115755
URL: http://llvm.org/viewvc/llvm-project?rev=115755&view=rev
Log:
- Add TargetInstrInfo::getOperandLatency() to compute operand latencies. This
allow target to correctly compute latency for cases where static scheduling
itineraries isn't sufficient. e.g. variable_ops instructions such as
ARM::ldm.
This also allows target without scheduling itineraries to compute operand
latencies. e.g. X86 can return (approximated) latencies for high latency
instructions such as division.
- Compute operand latencies for those defined by load multiple instructions,
e.g. ldm and those used by store multiple instructions, e.g. stm.
Modified:
llvm/trunk/include/llvm/Target/TargetInstrInfo.h
llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/ARMInstrThumb.td
llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
llvm/trunk/lib/Target/ARM/ARMSchedule.td
llvm/trunk/lib/Target/ARM/ARMScheduleA8.td
llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
llvm/trunk/lib/Target/ARM/ARMScheduleV6.td
llvm/trunk/lib/Target/TargetInstrInfo.cpp
llvm/trunk/test/CodeGen/ARM/arguments.ll
llvm/trunk/test/CodeGen/ARM/select.ll
Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Oct 6 01:27:31 2010
@@ -608,6 +608,22 @@
/// instruction will be decoded to on the target cpu.
virtual unsigned getNumMicroOps(const MachineInstr *MI,
const InstrItineraryData *ItinData) const;
+
+ /// getOperandLatency - Compute and return the use operand latency of a given
+ /// itinerary class and operand index if the value is produced by an
+ /// instruction of the specified itinerary class and def operand index.
+ /// In most cases, the static scheduling itinerary was enough to determine the
+ /// operand latency. But it may not be possible for instructions with variable
+ /// number of defs / uses.
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
};
/// TargetInstrInfoImpl - This is the default implementation of
Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Wed Oct 6 01:27:31 2010
@@ -527,10 +527,10 @@
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- MachineInstr *UseMI = Use->getInstr();
- unsigned UseClass = UseMI->getDesc().getSchedClass();
+ const TargetInstrDesc &DefTID = DefMI->getDesc();
+ unsigned DefClass = DefTID.getSchedClass();
+ MachineInstr *UseMI = Use->getInstr();
// For all uses of the register, calculate the maxmimum latency
int Latency = -1;
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
@@ -541,8 +541,7 @@
if (MOReg != Reg)
continue;
- int UseCycle = InstrItins->getOperandLatency(DefClass, DefIdx,
- UseClass, i);
+ int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i);
Latency = std::max(Latency, UseCycle);
// If we found a latency, then replace the existing dependence latency.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Wed Oct 6 01:27:31 2010
@@ -450,29 +450,11 @@
if (ForceUnitLatencies())
return;
- if (!InstrItins || InstrItins->isEmpty())
- return;
-
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (!Def->isMachineOpcode())
- return;
-
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
-
- int Latency = 0;
- if (!Use->isMachineOpcode()) {
- Latency = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
- } else {
- unsigned DefClass = II.getSchedClass();
- unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- Latency = InstrItins->getOperandLatency(DefClass, DefIdx, UseClass, OpIdx);
- }
-
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
if (Latency >= 0)
dep.setLatency(Latency);
}
Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Wed Oct 6 01:27:31 2010
@@ -1642,3 +1642,164 @@
}
}
}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefTID.getSchedClass();
+ unsigned UseClass = UseTID.getSchedClass();
+
+ if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ bool LdmBypass = false;
+ int DefCycle = -1;
+ switch (DefTID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+ case ARM::LDM_RET:
+ case ARM::LDM:
+ case ARM::LDM_UPD:
+ case ARM::tLDM:
+ case ARM::tLDM_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDM_RET:
+ case ARM::t2LDM:
+ case ARM::t2LDM_UPD: {
+ LdmBypass = 1;
+ unsigned RegNo = (DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+ }
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseTID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+ case ARM::STM:
+ case ARM::STM_UPD:
+ case ARM::tSTM_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STM:
+ case ARM::t2STM_UPD: {
+ unsigned RegNo = UseIdx - UseTID.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Result latency is issue cycle + 2: E2.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ // Result latency is AGU cycles + 2.
+ UseCycle += 2;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+ }
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx))
+ --UseCycle;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const TargetInstrDesc &DefTID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+
+ const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+}
Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h Wed Oct 6 01:27:31 2010
@@ -340,6 +340,21 @@
virtual unsigned getNumMicroOps(const MachineInstr *MI,
const InstrItineraryData *ItinData) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+private:
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const;
};
static inline
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Wed Oct 6 01:27:31 2010
@@ -954,7 +954,7 @@
hasExtraDefRegAllocReq = 1 in
def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iLoadmBr,
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
@@ -1463,12 +1463,12 @@
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iLoadm,
+ IndexModeNone, LdStMulFrm, IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iLoadm,
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mu,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
@@ -1476,12 +1476,12 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iStorem,
+ IndexModeNone, LdStMulFrm, IIC_iStore_m,
"stm${addr:submode}${p}\t$addr, $srcs", "", []>;
def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iStorem,
+ IndexModeUpd, LdStMulFrm, IIC_iStore_mu,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>;
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Wed Oct 6 01:27:31 2010
@@ -280,7 +280,7 @@
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadmBr,
+ IIC_iPop_Br,
"pop${p}\t$dsts", []>,
T1Misc<{1,1,0,?,?,?,?}>;
@@ -535,13 +535,13 @@
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def tLDM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
+ IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr, $dsts", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
def tLDM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
+ IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
@@ -550,18 +550,20 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def tSTM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
- IIC_iStorem,
+ IIC_iStore_mu,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
-def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_iLoadmBr,
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
+ IIC_iPop,
"pop${p}\t$dsts", []>,
T1Misc<{1,1,0,?,?,?,?}>;
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
-def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_iStorem,
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops),
+ IIC_iStore_m,
"push${p}\t$srcs", []>,
T1Misc<{0,1,0,?,?,?,?}>;
Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Wed Oct 6 01:27:31 2010
@@ -1243,7 +1243,7 @@
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
+ reglist:$dsts, variable_ops), IIC_iLoad_m,
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
@@ -1254,7 +1254,8 @@
}
def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
+ reglist:$dsts, variable_ops),
+ IIC_iLoad_mu,
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
@@ -1268,7 +1269,7 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops), IIC_iStorem,
+ reglist:$srcs, variable_ops), IIC_iStore_m,
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
@@ -1280,7 +1281,7 @@
def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IIC_iStorem,
+ IIC_iStore_m,
"stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
@@ -2473,7 +2474,7 @@
hasExtraDefRegAllocReq = 1 in
def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IIC_iLoadmBr,
+ IIC_iLoad_mBr,
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Oct 6 01:27:31 2010
@@ -67,8 +67,11 @@
def IIC_iLoad_d_i : InstrItinClass;
def IIC_iLoad_d_r : InstrItinClass;
def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoadm : InstrItinClass<0>; // micro-coded
-def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
+def IIC_iPop : InstrItinClass<0>; // micro-coded
+def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
def IIC_iLoadiALU : InstrItinClass;
def IIC_iStore_i : InstrItinClass;
def IIC_iStore_r : InstrItinClass;
@@ -85,7 +88,8 @@
def IIC_iStore_d_i : InstrItinClass;
def IIC_iStore_d_r : InstrItinClass;
def IIC_iStore_d_ru : InstrItinClass;
-def IIC_iStorem : InstrItinClass<0>; // micro-coded
+def IIC_iStore_m : InstrItinClass<0>; // micro-coded
+def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
def IIC_fpUNA32 : InstrItinClass;
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA8.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA8.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA8.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA8.td Wed Oct 6 01:27:31 2010
@@ -172,21 +172,44 @@
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
//
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>,
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>]>,
-
+ InstrStage<1, [A8_LdSt0]>], [1, 1, 1, 1, 3]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 1, 1, 1, 3]>,
//
// Load multiple plus branch
- InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<2, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [1, 1, 3]>,
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3]>,
//
// iLoadi + iALUr for t2LDRpci_pic.
@@ -266,11 +289,18 @@
InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
//
// Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2]>,
// Branch
//
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Wed Oct 6 01:27:31 2010
@@ -242,18 +242,42 @@
InstrStage<2, [A9_AGU]>],
[5, 4, 1, 1], [A9_LdBypass]>,
//
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<2, [A9_AGU]>],
- [3], [A9_LdBypass]>,
-
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
//
// Load multiple plus branch
- InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU]>,
- InstrStage<1, [A9_Branch]>]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
//
// iLoadi + iALUr for t2LDRpci_pic.
@@ -329,13 +353,21 @@
[3, 1, 1, 1]>,
//
// Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>], [2]>,
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [A9_Branch]>]>,
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
// VFP and NEON shares the same register file. This means that every VFP
// instruction should wait for full completion of the consecutive NEON
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV6.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV6.td?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV6.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV6.td Wed Oct 6 01:27:31 2010
@@ -116,19 +116,29 @@
InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
//
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>,
-
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
//
// Load multiple plus branch
- InstrItinData<IIC_iLoadmBr , [InstrStage<3, [V6_Pipe]>,
- InstrStage<1, [V6_Pipe]>]>,
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
// Integer store pipeline
//
// Immediate offset
@@ -159,7 +169,10 @@
InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
//
// Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>,
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
// Branch
//
Modified: llvm/trunk/lib/Target/TargetInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetInstrInfo.cpp?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/lib/Target/TargetInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/TargetInstrInfo.cpp Wed Oct 6 01:27:31 2010
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -64,6 +65,36 @@
return 1;
}
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+
/// insertNoop - Insert a noop into the instruction stream at the specified
/// point.
void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
Modified: llvm/trunk/test/CodeGen/ARM/arguments.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arguments.ll?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arguments.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arguments.ll Wed Oct 6 01:27:31 2010
@@ -13,8 +13,8 @@
; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
define i32 @f2() nounwind optsize {
; ELF: f2:
-; ELF: mov r0, #128
-; ELF: str r0, [sp]
+; ELF: mov [[REGISTER:(r[0-9]+)]], #128
+; ELF: str [[REGISTER]], [sp]
; DARWIN: f2:
; DARWIN: mov r3, #128
entry:
Modified: llvm/trunk/test/CodeGen/ARM/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/select.ll?rev=115755&r1=115754&r2=115755&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/select.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/select.ll Wed Oct 6 01:27:31 2010
@@ -79,9 +79,9 @@
; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
-; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
; CHECK-NEON-NEXT: ldr
; CHECK-NEON: bx
More information about the llvm-commits
mailing list