[llvm] r204174 - X86 memcpy lowering: use "rep movs" even when esi is used as base pointer

Hans Wennborg hans at hanshq.net
Tue Mar 18 13:04:34 PDT 2014


Author: hans
Date: Tue Mar 18 15:04:34 2014
New Revision: 204174

URL: http://llvm.org/viewvc/llvm-project?rev=204174&view=rev
Log:
X86 memcpy lowering: use "rep movs" even when esi is used as base pointer

For functions where esi is used as base pointer, we would previously fall back
from lowering memcpy with "rep movs" because that clobbers esi.

With this patch, we just store esi in another physical register, and restore
it afterwards. This adds a little bit of register preassure, but the more
efficient memcpy should be worth it.

Differential Revision: http://llvm-reviews.chandlerc.com/D2968

Modified:
    llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp
    llvm/trunk/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
    llvm/trunk/test/CodeGen/X86/stack-align-memcpy.ll

Modified: llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=204174&r1=204173&r2=204174&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp Tue Mar 18 15:04:34 2014
@@ -200,13 +200,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMe
       SrcPtrInfo.getAddrSpace() >= 256)
     return SDValue();
 
-  // ESI might be used as a base pointer, in that case we can't simply overwrite
-  // the register.  Fall back to generic code.
+  // If ESI is used as a base pointer, we must preserve it when doing rep movs.
   const X86RegisterInfo *TRI =
       static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
-  if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
-      TRI->getBaseRegister() == X86::ESI)
-    return SDValue();
+  bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) &&
+                     TRI->getBaseRegister() == X86::ESI;
 
   MVT AVT;
   if (Align & 1)
@@ -225,27 +223,45 @@ X86SelectionDAGInfo::EmitTargetCodeForMe
   SDValue Count = DAG.getIntPtrConstant(CountVal);
   unsigned BytesLeft = SizeVal % UBytes;
 
-  SDValue InFlag(0, 0);
+
+  if (PreserveESI) {
+    // Save ESI to a physical register. (We cannot use a virtual register
+    // because if it is spilled we wouldn't be able to reload it.)
+    // We don't glue this because the register dependencies are explicit.
+    Chain = DAG.getCopyToReg(Chain, dl, X86::EDX,
+                             DAG.getRegister(X86::ESI, MVT::i32));
+  }
+
+  SDValue InGlue(0, 0);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                               X86::ECX,
-                            Count, InFlag);
-  InFlag = Chain.getValue(1);
+                            Count, InGlue);
+  InGlue = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                                                               X86::EDI,
-                            Dst, InFlag);
-  InFlag = Chain.getValue(1);
+                            Dst, InGlue);
+  InGlue = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
                                                               X86::ESI,
-                            Src, InFlag);
-  InFlag = Chain.getValue(1);
+                            Src, InGlue);
+  InGlue = Chain.getValue(1);
 
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
+  // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue.
   SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
                                 array_lengthof(Ops));
 
+  if (PreserveESI) {
+    InGlue = RepMovs.getValue(1);
+    RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI,
+                               DAG.getRegister(X86::EDX, MVT::i32), InGlue);
+  }
+
   SmallVector<SDValue, 4> Results;
   Results.push_back(RepMovs);
+
+
   if (BytesLeft) {
     // Handle the last 1 - 7 bytes.
     unsigned Offset = SizeVal - BytesLeft;

Modified: llvm/trunk/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll?rev=204174&r1=204173&r2=204174&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll Tue Mar 18 15:04:34 2014
@@ -13,5 +13,7 @@ define void @test1(%struct.foo* nocaptur
 
 ; CHECK-LABEL: test1:
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }

Modified: llvm/trunk/test/CodeGen/X86/stack-align-memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-align-memcpy.ll?rev=204174&r1=204173&r2=204174&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-align-memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-align-memcpy.ll Tue Mar 18 15:04:34 2014
@@ -15,7 +15,9 @@ define void @test1(%struct.foo* nocaptur
 ; CHECK-LABEL: test1:
 ; CHECK: andl $-16, %esp
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }
 
 ; PR19012
@@ -28,7 +30,9 @@ define void @test2(%struct.foo* nocaptur
 
 ; CHECK-LABEL: test2:
 ; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
 }
 
 ; Check that we do use rep movs if we make the alloca static.
@@ -39,5 +43,6 @@ define void @test3(%struct.foo* nocaptur
   ret void
 
 ; CHECK-LABEL: test3:
+; CHECK-NOT: movl %esi, %edx
 ; CHECK: rep;movsl
 }





More information about the llvm-commits mailing list