[llvm-commits] [llvm] r122936 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAG.cpp test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll test/CodeGen/X86/memcpy.ll
Evan Cheng
evan.cheng at apple.com
Wed Jan 5 17:04:47 PST 2011
Author: evancheng
Date: Wed Jan 5 19:04:47 2011
New Revision: 122936
URL: http://llvm.org/viewvc/llvm-project?rev=122936&view=rev
Log:
r105228 reduced the memcpy / memset inline limit to 4 with -Os to avoid blowing
up freebsd bootloader. However, this doesn't make much sense for Darwin, whose
-Os is meant to optimize for size only if it doesn't hurt performance.
rdar://8821501
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
llvm/trunk/test/CodeGen/X86/memcpy.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=122936&r1=122935&r2=122936&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Jan 5 19:04:47 2011
@@ -50,6 +50,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -3286,8 +3287,14 @@
// the size of a call to memcpy or memset (3 arguments + call).
if (Limit != ~0U) {
const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize)) {
+ Triple T(((LLVMTargetMachine&)TLI.getTargetMachine()).getTargetTriple());
+ if (T.getOS() != Triple::Darwin)
+ // A pretty terrible hack to defat the wild guess. On Darwin, -Os means
+ // optimize for size without hurting performance so we don't want to
+ // bump down the limit.
+ Limit = 4;
+ }
}
unsigned NumMemOps = 0;
Modified: llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll?rev=122936&r1=122935&r2=122936&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll Wed Jan 5 19:04:47 2011
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
; <rdar://problem/8124405>
%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
@@ -21,9 +21,9 @@
; statement. It can be an ADD or LEA instruction, it's not important which one
; it is.
;
-; CHECK: ## %bb
-; CHECK-NEXT: addq $64036, %rdi
-; CHECK: rep;stosl
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
%tmp5 = bitcast i32* %tmp4 to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
Modified: llvm/trunk/test/CodeGen/X86/memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy.ll?rev=122936&r1=122935&r2=122936&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcpy.ll Wed Jan 5 19:04:47 2011
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -9,8 +10,8 @@
tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
ret i8* %a
-; CHECK: test1:
-; CHECK: memcpy
+; LINUX: test1:
+; LINUX: memcpy
}
; Variable memcpy's should lower to calls.
@@ -21,18 +22,41 @@
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
ret i8* %tmp14
-; CHECK: test2:
-; CHECK: memcpy
+; LINUX: test2:
+; LINUX: memcpy
}
; Large constant memcpy's should lower to a call when optimizing for size.
; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test3:
-; CHECK: memcpy
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
}
; Large constant memcpy's should be inlined when not optimizing for size.
@@ -40,18 +64,18 @@
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
-; CHECK: test4:
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
}
More information about the llvm-commits
mailing list