[llvm] r208994 - Re-enable inline memcpy expansion for Thumb1.

James Molloy james.molloy at arm.com
Fri May 16 07:24:22 PDT 2014


Author: jamesm
Date: Fri May 16 09:24:22 2014
New Revision: 208994

URL: http://llvm.org/viewvc/llvm-project?rev=208994&view=rev
Log:
Re-enable inline memcpy expansion for Thumb1.

Patch by Moritz Roth!


Added:
    llvm/trunk/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMSubtarget.h
    llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll

Modified: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp?rev=208994&r1=208993&r2=208994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp Fri May 16 09:24:22 2014
@@ -53,9 +53,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMe
   EVT VT = MVT::i32;
   unsigned VTSize = 4;
   unsigned i = 0;
-  const unsigned MAX_LOADS_IN_LDM = 6;
-  SDValue TFOps[MAX_LOADS_IN_LDM];
-  SDValue Loads[MAX_LOADS_IN_LDM];
+  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+  const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+  SDValue TFOps[6];
+  SDValue Loads[6];
   uint64_t SrcOff = 0, DstOff = 0;
 
   // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the

Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=208994&r1=208993&r2=208994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Fri May 16 09:24:22 2014
@@ -239,9 +239,7 @@ protected:
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
   unsigned getMaxInlineSizeThreshold() const {
-    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
-    // Change this once Thumb1 ldmia / stmia support is added.
-    return isThumb1Only() ? 0 : 64;
+    return 64;
   }
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.

Modified: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll?rev=208994&r1=208993&r2=208994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll Fri May 16 09:24:22 2014
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
-
+; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-T1
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
@@ -17,7 +17,12 @@ define i32 @t0() {
 entry:
 ; CHECK-LABEL: t0:
 ; CHECK: vldr [[REG1:d[0-9]+]],
-; CHECK: vstr [[REG1]], 
+; CHECK: vstr [[REG1]],
+; CHECK-T1-LABEL: t0:
+; CHECK-T1: ldrb [[TREG1:r[0-9]]],
+; CHECK-T1: strb [[TREG1]],
+; CHECK-T1: ldrh [[TREG2:r[0-9]]],
+; CHECK-T1: strh [[TREG2]]
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
   ret i32 0
 }
@@ -83,6 +88,11 @@ entry:
 ; CHECK: movw [[REG7:r[0-9]+]], #18500
 ; CHECK: movt [[REG7:r[0-9]+]], #22866
 ; CHECK: str [[REG7]]
+; CHECK-T1-LABEL: t5:
+; CHECK-T1: movs [[TREG3:r[0-9]]],
+; CHECK-T1: strb [[TREG3]],
+; CHECK-T1: movs [[TREG4:r[0-9]]],
+; CHECK-T1: strb [[TREG4]],
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
   ret void
 }
@@ -90,12 +100,17 @@ entry:
 define void @t6() nounwind {
 entry:
 ; CHECK-LABEL: t6:
-; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
-; CHECK: vstr [[REG8]], [r1]
+; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0]
+; CHECK: vstr [[REG9]], [r1]
 ; CHECK: adds r1, #6
 ; CHECK: adds r0, #6
 ; CHECK: vld1.8
 ; CHECK: vst1.16
+; CHECK-T1-LABEL: t6:
+; CHECK-T1: movs [[TREG5:r[0-9]]],
+; CHECK-T1: strh [[TREG5]],
+; CHECK-T1: ldr [[TREG6:r[0-9]]],
+; CHECK-T1: str [[TREG6]]
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
   ret void
 }
@@ -104,9 +119,12 @@ entry:
 
 define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
 entry:
-; CHECK: t7
+; CHECK-LABEL: t7:
 ; CHECK: vld1.32
 ; CHECK: vst1.32
+; CHECK-T1-LABEL: t7:
+; CHECK-T1: ldr
+; CHECK-T1: str
   %0 = bitcast %struct.Foo* %a to i8*
   %1 = bitcast %struct.Foo* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)

Added: llvm/trunk/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll?rev=208994&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll (added)
+++ llvm/trunk/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll Fri May 16 09:24:22 2014
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s
+
+ at d = external global [64 x i32]
+ at s = external global [64 x i32]
+
+; Function Attrs: nounwind
+define void @t1() #0 {
+entry:
+; CHECK: ldr [[REG0:r[0-9]]],
+; CHECK: ldm [[REG0]]!,
+; CHECK: ldr [[REG1:r[0-9]]],
+; CHECK: stm [[REG1]]!,
+; CHECK: subs [[REG0]], #32
+; CHECK-NEXT: ldrb
+; CHECK: subs [[REG1]], #32
+; CHECK-NEXT: strb
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 33, i32 4, i1 false)
+    ret void
+}
+
+; Function Attrs: nounwind
+define void @t2() #0 {
+entry:
+; CHECK: ldr [[REG0:r[0-9]]],
+; CHECK: ldm [[REG0]]!,
+; CHECK: ldr [[REG1:r[0-9]]],
+; CHECK: stm [[REG1]]!,
+; CHECK: ldrh
+; CHECK: ldrb
+; CHECK: strb
+; CHECK: strh
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
+    ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1





More information about the llvm-commits mailing list