[llvm-commits] CVS: llvm/lib/Target/ARM/ARMISelLowering.h ARMISelLowering.cpp
Dale Johannesen
dalej at apple.com
Thu May 17 14:31:39 PDT 2007
Changes in directory llvm/lib/Target/ARM:
ARMISelLowering.h updated: 1.15 -> 1.16
ARMISelLowering.cpp updated: 1.52 -> 1.53
---
Log message:
More effective breakdown of memcpy into repeated load/store. These are now
in the order lod;lod;lod;sto;sto;sto which means the load-store optimizer
has a better chance of producing ldm/stm. Ideally you would get cooperation
from the RA as well but this is not there yet.
---
Diffs of the changes: (+77 -1)
ARMISelLowering.cpp | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++-
ARMISelLowering.h | 1
2 files changed, 77 insertions(+), 1 deletion(-)
Index: llvm/lib/Target/ARM/ARMISelLowering.h
diff -u llvm/lib/Target/ARM/ARMISelLowering.h:1.15 llvm/lib/Target/ARM/ARMISelLowering.h:1.16
--- llvm/lib/Target/ARM/ARMISelLowering.h:1.15 Fri Apr 27 08:54:47 2007
+++ llvm/lib/Target/ARM/ARMISelLowering.h Thu May 17 16:31:21 2007
@@ -135,6 +135,7 @@
SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
};
}
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
diff -u llvm/lib/Target/ARM/ARMISelLowering.cpp:1.52 llvm/lib/Target/ARM/ARMISelLowering.cpp:1.53
--- llvm/lib/Target/ARM/ARMISelLowering.cpp:1.52 Mon May 14 18:20:21 2007
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp Thu May 17 16:31:21 2007
@@ -187,7 +187,7 @@
// Expand mem operations genericly.
setOperationAction(ISD::MEMSET , MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
// Use the default implementation.
@@ -255,6 +255,8 @@
setSchedulingPreference(SchedulingForRegPressure);
computeRegisterProperties();
+
+ maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
}
@@ -1328,6 +1330,78 @@
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
}
+SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Dest = Op.getOperand(1);
+ SDOperand Src = Op.getOperand(2);
+ SDOperand Count = Op.getOperand(3);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Count);
+ // Just call memcpy if:
+ // not 4-byte aligned
+ // size is unknown
+ // size is >= the threshold.
+ if ((Align & 3) != 0 ||
+ !I ||
+ I->getValue() >= 64 ||
+ (I->getValue() & 3) != 0) {
+ MVT::ValueType IntPtr = getPointerTy();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getTargetData()->getIntPtrType();
+ Entry.Node = Op.getOperand(1); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(2); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(3); Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
+ // Otherwise do repeated 4-byte loads and stores. To be improved.
+ assert((I->getValue() & 3) == 0);
+ assert((Align & 3) == 0);
+ unsigned NumMemOps = I->getValue() >> 2;
+ unsigned EmittedNumMemOps = 0;
+ unsigned SrcOff = 0, DstOff = 0;
+ MVT::ValueType VT = MVT::i32;
+ unsigned VTSize = 4;
+ const int MAX_LOADS_IN_LDM = 6;
+ SDOperand LoadChains[MAX_LOADS_IN_LDM];
+ SDOperand Loads[MAX_LOADS_IN_LDM];
+
+ // Emit up to 4 loads, then a TokenFactor barrier, then the same
+ // number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while(EmittedNumMemOps < NumMemOps) {
+ unsigned int i;
+ for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) {
+ Loads[i] = DAG.getLoad(VT, Chain,
+ DAG.getNode(ISD::ADD, VT, Src,
+ DAG.getConstant(SrcOff, VT)),
+ NULL, 0);
+ LoadChains[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &LoadChains[0], i);
+
+ for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) {
+ Chain = DAG.getStore(Chain, Loads[i],
+ DAG.getNode(ISD::ADD, VT, Dest,
+ DAG.getConstant(DstOff, VT)),
+ NULL, 0);
+ DstOff += VTSize;
+ }
+ EmittedNumMemOps += i;
+ }
+
+ return Chain;
+}
+
SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Don't know how to custom lower this!"); abort();
@@ -1358,6 +1432,7 @@
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: break;
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
}
return SDOperand();
}
More information about the llvm-commits
mailing list