[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86Subtarget.cpp X86Subtarget.h
Evan Cheng
evan.cheng at apple.com
Wed Feb 15 16:21:19 PST 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.82 -> 1.83
X86Subtarget.cpp updated: 1.24 -> 1.25
X86Subtarget.h updated: 1.10 -> 1.11
---
Log message:
A bit more memset / memcpy optimization.
Turns them into calls to memset / memcpy if 1) buffer(s) are not DWORD aligned,
2) size is not known to be greater or equal to some minimum value (currently 128).
---
Diffs of the changes: (+53 -7)
X86ISelLowering.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++-------
X86Subtarget.cpp | 2 ++
X86Subtarget.h | 9 +++++++++
3 files changed, 53 insertions(+), 7 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.82 llvm/lib/Target/X86/X86ISelLowering.cpp:1.83
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.82 Tue Feb 14 02:38:30 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Wed Feb 15 18:21:07 2006
@@ -1772,6 +1772,25 @@
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memset if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 &&
+ !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData().getIntPtrType();
+ std::vector<std::pair<SDOperand, const Type*> > Args;
+ Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
+ // Extend the ubyte argument to be an int value for the call.
+ SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
+ Args.push_back(std::make_pair(Val, IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
MVT::ValueType AVT;
SDOperand Count;
if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
@@ -1782,7 +1801,7 @@
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@@ -1792,7 +1811,7 @@
break;
case 0: // DWORD aligned
AVT = MVT::i32;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@@ -1812,7 +1831,7 @@
InFlag);
InFlag = Chain.getValue(1);
} else {
- AVT = MVT::i8;
+ AVT = MVT::i8;
Count = Op.getOperand(3);
Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
InFlag = Chain.getValue(1);
@@ -1832,20 +1851,36 @@
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memcpy if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 &&
+ !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData().getIntPtrType();
+ std::vector<std::pair<SDOperand, const Type*> > Args;
+ Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
MVT::ValueType AVT;
SDOperand Count;
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
- Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
- DAG.getConstant(1, MVT::i8));
+ Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
break;
case 0: // DWORD aligned
AVT = MVT::i32;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
Index: llvm/lib/Target/X86/X86Subtarget.cpp
diff -u llvm/lib/Target/X86/X86Subtarget.cpp:1.24 llvm/lib/Target/X86/X86Subtarget.cpp:1.25
--- llvm/lib/Target/X86/X86Subtarget.cpp:1.24 Tue Feb 14 14:37:37 2006
+++ llvm/lib/Target/X86/X86Subtarget.cpp Wed Feb 15 18:21:07 2006
@@ -146,6 +146,8 @@
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
stackAlignment = 8;
+ // FIXME: this is a known good value for Yonah. Not sure about others.
+ MinRepStrSizeThreshold = 128;
indirectExternAndWeakGlobals = false;
X86SSELevel = NoMMXSSE;
X863DNowLevel = NoThreeDNow;
Index: llvm/lib/Target/X86/X86Subtarget.h
diff -u llvm/lib/Target/X86/X86Subtarget.h:1.10 llvm/lib/Target/X86/X86Subtarget.h:1.11
--- llvm/lib/Target/X86/X86Subtarget.h:1.10 Tue Jan 31 13:43:35 2006
+++ llvm/lib/Target/X86/X86Subtarget.h Wed Feb 15 18:21:07 2006
@@ -44,6 +44,9 @@
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
+ /// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ unsigned MinRepStrSizeThreshold;
+
/// Used by instruction selector
bool indirectExternAndWeakGlobals;
@@ -62,6 +65,12 @@
/// function for this subtarget.
unsigned getStackAlignment() const { return stackAlignment; }
+ /// getMinRepStrSizeThreshold - Returns the minimum memset / memcpy size
+ /// required to turn the operation into a X86 rep/movs or rep/stos
+ /// instruction. This is only used if the src / dst alignment is not DWORD
+ /// aligned.
+ unsigned getMinRepStrSizeThreshold() const { return MinRepStrSizeThreshold; }
+
/// Returns true if the instruction selector should treat global values
/// referencing external or weak symbols as indirect rather than direct
/// references.
More information about the llvm-commits
mailing list