[PATCH] memcpy lowering: use "rep movs" even when esi is used as base pointer

Hans Wennborg hans at chromium.org
Wed Mar 5 10:05:56 PST 2014


Not sure who the best reviewer for this would be, so just putting it out there.

For functions where esi is used as base pointer, we fall back from lowering memcpy with "rep movs" because it clobbers that register.

If we wanted to, we could just store esi in another physical register and restore it afterwards.

For example, for test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll we currently generate:

_test1:                                 ## @test1
## BB#0:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        andl    $-16, %esp
        subl    $96, %esp
        movl    %esp, %esi
        movl    16(%ebp), %edi
        movl    8(%ebp), %eax
        movl    84(%eax), %ecx
        movl    %ecx, 88(%esp)
        movl    80(%eax), %ecx
        movl    %ecx, 84(%esp)
        movl    76(%eax), %ecx
        movl    %ecx, 80(%esp)
        movl    72(%eax), %ecx
        movl    %ecx, 76(%esp)
        movl    68(%eax), %ecx
        movl    %ecx, 72(%esp)
        movl    64(%eax), %ecx
        movl    %ecx, 68(%esp)
        movl    60(%eax), %ecx
        movl    %ecx, 64(%esp)
        movl    56(%eax), %ecx
        movl    %ecx, 60(%esp)
        movl    52(%eax), %ecx
        movl    %ecx, 56(%esp)
        movl    48(%eax), %ecx
        movl    %ecx, 52(%esp)
        movl    44(%eax), %ecx
        movl    %ecx, 48(%esp)
        movl    40(%eax), %ecx
        movl    %ecx, 44(%esp)
        movl    36(%eax), %ecx
        movl    %ecx, 40(%esp)
        movl    32(%eax), %ecx
        movl    %ecx, 36(%esp)
        movl    28(%eax), %ecx
        movl    %ecx, 32(%esp)
        movl    24(%eax), %ecx
        movl    %ecx, 28(%esp)
        movl    20(%eax), %ecx
        movl    %ecx, 24(%esp)
        movl    16(%eax), %ecx
        movl    %ecx, 20(%esp)
        movl    12(%eax), %ecx
        movl    %ecx, 16(%esp)
        movl    8(%eax), %ecx
        movl    %ecx, 12(%esp)
        movl    (%eax), %ecx
        movl    4(%eax), %eax
        movl    %eax, 8(%esp)
        movl    %ecx, 4(%esp)
        movl    %edi, (%esp)
        calll   L_bar$stub
        ## InlineAsm Start
        xorl    %esp, %esp
        ## InlineAsm End
        leal    -8(%ebp), %esp
        popl    %esi
        popl    %edi
        popl    %ebp
        retl

But with the attached patch, we generate:

_test1:                                 ## @tes
## BB#0:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        pushl   %edi
        pushl   %esi
        andl    $-16, %esp
        subl    $96, %esp
        movl    %esp, %esi
        movl    8(%ebp), %eax
        movl    16(%ebp), %ebx
        movl    %ebx, (%esp)
        leal    4(%esp), %edi
        movl    $22, %ecx
        movl    %esi, %edx
        movl    %eax, %esi
        rep;movsl
        movl    %edx, %esi
        calll   L_bar$stub
        ## InlineAsm Start
        xorl    %esp, %esp
        ## InlineAsm End
        leal    -12(%ebp), %esp
        popl    %esi
        popl    %edi
        popl    %ebx
        popl    %ebp
        retl


http://llvm-reviews.chandlerc.com/D2968

Files:
  lib/Target/X86/X86SelectionDAGInfo.cpp
  test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
  test/CodeGen/X86/stack-align-memcpy.ll

Index: lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- lib/Target/X86/X86SelectionDAGInfo.cpp
+++ lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -200,13 +200,11 @@
       SrcPtrInfo.getAddrSpace() >= 256)
     return SDValue();
 
-  // ESI might be used as a base pointer, in that case we can't simply overwrite
-  // the register.  Fall back to generic code.
+  // If ESI is used as a base pointer, we must preserve it when doing rep movs.
   const X86RegisterInfo *TRI =
       static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
-  if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
-      TRI->getBaseRegister() == X86::ESI)
-    return SDValue();
+  bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) &&
+                     TRI->getBaseRegister() == X86::ESI;
 
   MVT AVT;
   if (Align & 1)
@@ -226,6 +224,14 @@
   unsigned BytesLeft = SizeVal % UBytes;
 
   SDValue InFlag(0, 0);
+
+  if (PreserveESI) {
+    // Save ESI to a physical register. (We cannot use a virtual register
+    // because if it is spilled we wouldn't be ablle to reload it.)
+    Chain = DAG.getCopyToReg(Chain, dl, X86::EDX,
+                             DAG.getRegister(X86::ESI, MVT::i32), InFlag);
+  }
+
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                               X86::ECX,
                             Count, InFlag);
@@ -244,8 +250,16 @@
   SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
                                 array_lengthof(Ops));
 
+  if (PreserveESI) {
+    InFlag = RepMovs.getValue(1);
+    RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI,
+                               DAG.getRegister(X86::EDX, MVT::i32), InFlag);
+  }
+
   SmallVector<SDValue, 4> Results;
   Results.push_back(RepMovs);
+
+
   if (BytesLeft) {
     // Handle the last 1 - 7 bytes.
     unsigned Offset = SizeVal - BytesLeft;
Index: test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
===================================================================
--- test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
+++ test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
@@ -13,5 +13,7 @@
 
 ; CHECK-LABEL: test1:
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }
Index: test/CodeGen/X86/stack-align-memcpy.ll
===================================================================
--- test/CodeGen/X86/stack-align-memcpy.ll
+++ test/CodeGen/X86/stack-align-memcpy.ll
@@ -15,7 +15,9 @@
 ; CHECK-LABEL: test1:
 ; CHECK: andl $-16, %esp
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }
 
 ; PR19012
@@ -28,7 +30,9 @@
 
 ; CHECK-LABEL: test2:
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }
 
 ; Check that we do use rep movs if we make the alloca static.
@@ -39,5 +43,6 @@
   ret void
 
 ; CHECK-LABEL: test3:
+; CHECK-NOT: movl %esi, %edx
 ; CHECK: rep;movsl
 }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2968.1.patch
Type: text/x-patch
Size: 3134 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140305/fd230c88/attachment.bin>


More information about the llvm-commits mailing list