[llvm-commits] [llvm] r122952 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/XCore/XCoreISelLowering.cpp test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll test/CodeGen/X86/memcpy.ll
Evan Cheng
evan.cheng at apple.com
Wed Jan 5 22:52:41 PST 2011
Author: evancheng
Date: Thu Jan 6 00:52:41 2011
New Revision: 122952
URL: http://llvm.org/viewvc/llvm-project?rev=122952&view=rev
Log:
Re-implement r122936 with proper target hooks. Now getMaxStoresPerMemcpy
etc. takes an option OptSize. If OptSize is true, it would return
the inline limit for functions with attribute OptSize.
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp
llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
llvm/trunk/test/CodeGen/X86/memcpy.ll
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu Jan 6 00:52:41 2011
@@ -642,21 +642,30 @@
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memset. The value is set by the target at the
- /// performance threshold for such a replacement.
+ /// performance threshold for such a replacement. If OptSize is true,
+ /// return the limit for functions that have OptSize attribute.
/// @brief Get maximum # of store operations permitted for llvm.memset
- unsigned getMaxStoresPerMemset() const { return maxStoresPerMemset; }
+ unsigned getMaxStoresPerMemset(bool OptSize) const {
+ return OptSize ? maxStoresPerMemsetOptSize : maxStoresPerMemset;
+ }
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memcpy. The value is set by the target at the
- /// performance threshold for such a replacement.
+ /// performance threshold for such a replacement. If OptSize is true,
+ /// return the limit for functions that have OptSize attribute.
/// @brief Get maximum # of store operations permitted for llvm.memcpy
- unsigned getMaxStoresPerMemcpy() const { return maxStoresPerMemcpy; }
+ unsigned getMaxStoresPerMemcpy(bool OptSize) const {
+ return OptSize ? maxStoresPerMemcpyOptSize : maxStoresPerMemcpy;
+ }
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memmove. The value is set by the target at the
- /// performance threshold for such a replacement.
+ /// performance threshold for such a replacement. If OptSize is true,
+ /// return the limit for functions that have OptSize attribute.
/// @brief Get maximum # of store operations permitted for llvm.memmove
- unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }
+ unsigned getMaxStoresPerMemmove(bool OptSize) const {
+ return OptSize ? maxStoresPerMemmoveOptSize : maxStoresPerMemmove;
+ }
/// This function returns true if the target allows unaligned memory accesses.
/// of the specified type. This is used, for example, in situations where an
@@ -1776,6 +1785,10 @@
/// @brief Specify maximum number of store instructions per memset call.
unsigned maxStoresPerMemset;
+ /// Maximum number of stores operations that may be substituted for the call
+ /// to memset, used for functions with OptSize attribute.
+ unsigned maxStoresPerMemsetOptSize;
+
/// When lowering \@llvm.memcpy this field specifies the maximum number of
/// store operations that may be substituted for a call to memcpy. Targets
/// must set this value based on the cost threshold for that target. Targets
@@ -1788,6 +1801,10 @@
/// @brief Specify maximum bytes of store instructions per memcpy call.
unsigned maxStoresPerMemcpy;
+ /// Maximum number of store operations that may be substituted for a call
+ /// to memcpy, used for functions with OptSize attribute.
+ unsigned maxStoresPerMemcpyOptSize;
+
/// When lowering \@llvm.memmove this field specifies the maximum number of
/// store instructions that may be substituted for a call to memmove. Targets
/// must set this value based on the cost threshold for that target. Targets
@@ -1799,6 +1816,10 @@
/// @brief Specify maximum bytes of store instructions per memmove call.
unsigned maxStoresPerMemmove;
+ /// Maximum number of store instructions that may be substituted for a call
+ /// to memmove, used for functions with OpSize attribute.
+ unsigned maxStoresPerMemmoveOptSize;
+
/// This field specifies whether the target can benefit from code placement
/// optimization.
bool benefitFromCodePlacementOpt;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Jan 6 00:52:41 2011
@@ -3281,15 +3281,6 @@
VT = LVT;
}
- // If we're optimizing for size, and there is a limit, bump the maximum number
- // of operations inserted down to 4. This is a wild guess that approximates
- // the size of a call to memcpy or memset (3 arguments + call).
- if (Limit != ~0U) {
- const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
- }
-
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
@@ -3335,7 +3326,9 @@
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3345,7 +3338,7 @@
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3426,14 +3419,16 @@
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3502,13 +3497,15 @@
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool NonScalarIntSafe =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
NonScalarIntSafe, false, DAG, TLI))
return SDValue();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Thu Jan 6 00:52:41 2011
@@ -567,6 +567,8 @@
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
benefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Jan 6 00:52:41 2011
@@ -687,7 +687,8 @@
else
setSchedulingPreference(Sched::Hybrid);
- maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
+ //// temporary - rewrite interface to use type
+ maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 6 00:52:41 2011
@@ -978,11 +978,14 @@
computeRegisterProperties();
- // FIXME: These should be based on subtarget info. Plus, the values should
- // be smaller when we are in optimizing for size mode.
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
Modified: llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp Thu Jan 6 00:52:41 2011
@@ -149,8 +149,9 @@
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- maxStoresPerMemset = 4;
- maxStoresPerMemmove = maxStoresPerMemcpy = 2;
+ maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+ maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+ = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::STORE);
Modified: llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll Thu Jan 6 00:52:41 2011
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
; <rdar://problem/8124405>
%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
@@ -21,9 +21,9 @@
; statement. It can be an ADD or LEA instruction, it's not important which one
; it is.
;
-; CHECK: ## %bb
-; CHECK-NEXT: addq $64036, %rdi
-; CHECK: rep;stosl
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
%tmp5 = bitcast i32* %tmp4 to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
Modified: llvm/trunk/test/CodeGen/X86/memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy.ll?rev=122952&r1=122951&r2=122952&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcpy.ll Thu Jan 6 00:52:41 2011
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -9,8 +10,8 @@
tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
ret i8* %a
-; CHECK: test1:
-; CHECK: memcpy
+; LINUX: test1:
+; LINUX: memcpy
}
; Variable memcpy's should lower to calls.
@@ -21,18 +22,41 @@
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
ret i8* %tmp14
-; CHECK: test2:
-; CHECK: memcpy
+; LINUX: test2:
+; LINUX: memcpy
}
; Large constant memcpy's should lower to a call when optimizing for size.
; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test3:
-; CHECK: memcpy
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
}
; Large constant memcpy's should be inlined when not optimizing for size.
@@ -40,18 +64,18 @@
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test4:
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
}
More information about the llvm-commits
mailing list