[llvm] r271678 - Code size optimisation: do not inline memcpy if this expansion results

Fri Jun 3 08:39:00 PDT 2016

Author: sjoerdmeijer
Date: Fri Jun  3 10:38:55 2016
New Revision: 271678

URL: http://llvm.org/viewvc/llvm-project?rev=271678&view=rev
Log:
Code size optimisation: do not inline memcpy if this expansion results
in more instructions than the libary call.

Differential Revision: http://reviews.llvm.org/D20958

Added:
    llvm/trunk/test/CodeGen/ARM/memcpy-no-inline.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp?rev=271678&r1=271677&r2=271678&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp Fri Jun  3 10:38:55 2016
@@ -176,6 +176,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMe
   // emit.
   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
 
+  // Code size optimisation: do not inline memcpy if expansion results in
+  // more instructions than the libary call.
+  if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
+    return SDValue();
+  }
+
   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
 
   for (unsigned I = 0; I != NumMEMCPYs; ++I) {

Added: llvm/trunk/test/CodeGen/ARM/memcpy-no-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-no-inline.ll?rev=271678&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-no-inline.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-no-inline.ll Fri Jun  3 10:38:55 2016
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s
+
+%struct.mystruct = type { [31 x i8] }
+
+ at .str = private unnamed_addr constant [31 x i8] c"012345678901234567890123456789\00", align 1
+ at .str.1 = private unnamed_addr constant [21 x i8] c"01234567890123456789\00", align 1
+
+ at myglobal = common global %struct.mystruct zeroinitializer, align 1
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: foo:
+; CHECK:      __aeabi_memcpy
+; CHECK-NOT:  ldm
+  %mystring = alloca [31 x i8], align 1
+  %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
+  ret void
+}
+
+define void @bar() #0 {
+entry:
+; CHECK-LABEL: bar:
+; CHECK-NOT:   __aeabi_memcpy
+  %mystring = alloca [31 x i8], align 1
+  %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+attributes #0 = { minsize noinline nounwind optsize }