[PATCH] memcpy lowering: use "rep movs" even when esi is used as base pointer
Hans Wennborg
hans at chromium.org
Wed Mar 5 10:05:56 PST 2014
Not sure who the best reviewer for this would be, so just putting it out there.
For functions where esi is used as base pointer, we fall back from lowering memcpy with "rep movs" because it clobbers that register.
If we wanted to, we could just store esi in another physical register and restore it afterwards.
For example, for test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll we currently generate:
_test1: ## @test1
## BB#0:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
andl $-16, %esp
subl $96, %esp
movl %esp, %esi
movl 16(%ebp), %edi
movl 8(%ebp), %eax
movl 84(%eax), %ecx
movl %ecx, 88(%esp)
movl 80(%eax), %ecx
movl %ecx, 84(%esp)
movl 76(%eax), %ecx
movl %ecx, 80(%esp)
movl 72(%eax), %ecx
movl %ecx, 76(%esp)
movl 68(%eax), %ecx
movl %ecx, 72(%esp)
movl 64(%eax), %ecx
movl %ecx, 68(%esp)
movl 60(%eax), %ecx
movl %ecx, 64(%esp)
movl 56(%eax), %ecx
movl %ecx, 60(%esp)
movl 52(%eax), %ecx
movl %ecx, 56(%esp)
movl 48(%eax), %ecx
movl %ecx, 52(%esp)
movl 44(%eax), %ecx
movl %ecx, 48(%esp)
movl 40(%eax), %ecx
movl %ecx, 44(%esp)
movl 36(%eax), %ecx
movl %ecx, 40(%esp)
movl 32(%eax), %ecx
movl %ecx, 36(%esp)
movl 28(%eax), %ecx
movl %ecx, 32(%esp)
movl 24(%eax), %ecx
movl %ecx, 28(%esp)
movl 20(%eax), %ecx
movl %ecx, 24(%esp)
movl 16(%eax), %ecx
movl %ecx, 20(%esp)
movl 12(%eax), %ecx
movl %ecx, 16(%esp)
movl 8(%eax), %ecx
movl %ecx, 12(%esp)
movl (%eax), %ecx
movl 4(%eax), %eax
movl %eax, 8(%esp)
movl %ecx, 4(%esp)
movl %edi, (%esp)
calll L_bar$stub
## InlineAsm Start
xorl %esp, %esp
## InlineAsm End
leal -8(%ebp), %esp
popl %esi
popl %edi
popl %ebp
retl
But with the attached patch, we generate:
_test1: ## @tes
## BB#0:
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %edi
pushl %esi
andl $-16, %esp
subl $96, %esp
movl %esp, %esi
movl 8(%ebp), %eax
movl 16(%ebp), %ebx
movl %ebx, (%esp)
leal 4(%esp), %edi
movl $22, %ecx
movl %esi, %edx
movl %eax, %esi
rep;movsl
movl %edx, %esi
calll L_bar$stub
## InlineAsm Start
xorl %esp, %esp
## InlineAsm End
leal -12(%ebp), %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
retl
http://llvm-reviews.chandlerc.com/D2968
Files:
lib/Target/X86/X86SelectionDAGInfo.cpp
test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
test/CodeGen/X86/stack-align-memcpy.ll
Index: lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- lib/Target/X86/X86SelectionDAGInfo.cpp
+++ lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -200,13 +200,11 @@
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // ESI might be used as a base pointer, in that case we can't simply overwrite
- // the register. Fall back to generic code.
+ // If ESI is used as a base pointer, we must preserve it when doing rep movs.
const X86RegisterInfo *TRI =
static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
- if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
- TRI->getBaseRegister() == X86::ESI)
- return SDValue();
+ bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) &&
+ TRI->getBaseRegister() == X86::ESI;
MVT AVT;
if (Align & 1)
@@ -226,6 +224,14 @@
unsigned BytesLeft = SizeVal % UBytes;
SDValue InFlag(0, 0);
+
+ if (PreserveESI) {
+ // Save ESI to a physical register. (We cannot use a virtual register
+ // because if it is spilled we wouldn't be ablle to reload it.)
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EDX,
+ DAG.getRegister(X86::ESI, MVT::i32), InFlag);
+ }
+
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
X86::ECX,
Count, InFlag);
@@ -244,8 +250,16 @@
SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
array_lengthof(Ops));
+ if (PreserveESI) {
+ InFlag = RepMovs.getValue(1);
+ RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI,
+ DAG.getRegister(X86::EDX, MVT::i32), InFlag);
+ }
+
SmallVector<SDValue, 4> Results;
Results.push_back(RepMovs);
+
+
if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
Index: test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
===================================================================
--- test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
+++ test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
@@ -13,5 +13,7 @@
; CHECK-LABEL: test1:
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
Index: test/CodeGen/X86/stack-align-memcpy.ll
===================================================================
--- test/CodeGen/X86/stack-align-memcpy.ll
+++ test/CodeGen/X86/stack-align-memcpy.ll
@@ -15,7 +15,9 @@
; CHECK-LABEL: test1:
; CHECK: andl $-16, %esp
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
; PR19012
@@ -28,7 +30,9 @@
; CHECK-LABEL: test2:
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
; Check that we do use rep movs if we make the alloca static.
@@ -39,5 +43,6 @@
ret void
; CHECK-LABEL: test3:
+; CHECK-NOT: movl %esi, %edx
; CHECK: rep;movsl
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2968.1.patch
Type: text/x-patch
Size: 3134 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140305/fd230c88/attachment.bin>
More information about the llvm-commits
mailing list