[llvm-commits] [llvm] r144578 - in /llvm/trunk: lib/Target/ARM/ARMFastISel.cpp test/CodeGen/ARM/fast-isel-intrinsic.ll
Chad Rosier
mcrosier at apple.com
Mon Nov 14 14:46:17 PST 2011
Author: mcrosier
Date: Mon Nov 14 16:46:17 2011
New Revision: 144578
URL: http://llvm.org/viewvc/llvm-project?rev=144578&view=rev
Log:
Add support for inlining small memcpys.
rdar://10412592
Modified:
llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll
Modified: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFastISel.cpp?rev=144578&r1=144577&r2=144578&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp Mon Nov 14 16:46:17 2011
@@ -185,6 +185,9 @@
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ bool ARMIsMemXferSmall(uint64_t Len);
+ bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
+ bool isMemCpy);
unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
@@ -2193,18 +2196,76 @@
return true;
}
+bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) {
+ return Len <= 16;
+}
+
+bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
+ bool isMemCpy) {
+ // FIXME: Memmove's require a little more care because their source and
+ // destination may overlap.
+ if (!isMemCpy)
+ return false;
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!ARMIsMemXferSmall(Len))
+ return false;
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
+
+ bool RV;
+ unsigned ResultReg;
+ RV = ARMEmitLoad(VT, ResultReg, Src);
+ assert (RV = true && "Should be able to handle this load.");
+ RV = ARMEmitStore(VT, ResultReg, Dest);
+ assert (RV = true && "Should be able to handle this store.");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ Dest.Offset += Size;
+ Src.Offset += Size;
+ }
+
+ return true;
+}
+
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
- // FIXME: Small memcpy/memmove's are common enough that we want to do them
- // without a call if possible.
const MemTransferInst &MTI = cast<MemTransferInst>(I);
// Don't handle volatile.
if (MTI.isVolatile())
return false;
+
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // we would emit dead code because we don't currently handle memmoves.
+ bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ // Small memcpy/memmove's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+ if (ARMIsMemXferSmall(Len)) {
+ Address Dest, Src;
+ if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+ !ARMComputeAddress(MTI.getRawSource(), Src))
+ return false;
+ if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy))
+ return true;
+ }
+ }
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
Modified: llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll?rev=144578&r1=144577&r2=144578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll Mon Nov 14 16:46:17 2011
@@ -33,7 +33,7 @@
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
-; ARM: movw r2, #10
+; ARM: movw r2, #17
; ARM: str r0, [sp] @ 4-byte Spill
; ARM: mov r0, r1
; ARM: ldr r1, [sp] @ 4-byte Reload
@@ -43,11 +43,11 @@
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
-; THUMB: movs r2, #10
+; THUMB: movs r2, #17
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memcpy
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
ret void
}
@@ -75,4 +75,32 @@
ret void
}
+define void @t4() nounwind ssp {
+; ARM: t4
+; ARM: ldr r0, LCPI3_0
+; ARM: ldr r0, [r0]
+; ARM: ldr r1, LCPI3_1
+; ARM: ldr r1, [r1]
+; ARM: ldr r2, [r1, #16]
+; ARM: str r2, [r0, #4]
+; ARM: ldr r2, [r1, #20]
+; ARM: str r2, [r0, #8]
+; ARM: ldrh r1, [r1, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: ldr.n r0, LCPI3_0
+; THUMB: ldr r0, [r0]
+; THUMB: ldr.n r1, LCPI3_1
+; THUMB: ldr r1, [r1]
+; THUMB: ldr r2, [r1, #16]
+; THUMB: str r2, [r0, #4]
+; THUMB: ldr r2, [r1, #20]
+; THUMB: str r2, [r0, #8]
+; THUMB: ldrh r1, [r1, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+ ret void
+}
+
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
More information about the llvm-commits
mailing list