[llvm-commits] [llvm] r42864 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
Chris Lattner
clattner at apple.com
Fri Oct 12 10:30:55 PDT 2007
>>> In general memcpy handling is highly target specific. I'd be wary
>>> of doing too
>>> much in target-independent code, although I think the general idea
>>> here is OK.
>>
>> I'm expecting target specific code generator to handle this if it
>> is handling unaligned memcpy.
>> No ?
>
> Might well work, I'm not sure what order things are done in. Could
> you try it? PPC for example.
Heh, ouch. :) We are doing the safe thing, but generating horrible
horrible code (below).
The DAG combiner should be able to see through the masking/shifting
to allow each byte of the store to directly use the corresponding
load. This is probably just a matter of having
DAGCombiner::visitSTORE call TLI.SimplifyDemandedBits, indicating
which bits it is actually reading.
Can someone looking into adding this enhancement to dag combine?
-Chris
void foo(char *P) {
strcpy(P, "abc");
}
void bar(char *P) {
strcpy(P, "a");
}
ARM:
_foo:
ldr r3, LCPI0__foo
ldrb r2, [r3, #+3]
ldrb r1, [r3, #+2]
orr r2, r1, r2, lsl #8
ldrb r1, [r3, #+1]
ldrb r3, [r3]
orr r3, r3, r1, lsl #8
orr r3, r3, r2, lsl #16
strb r3, [r0]
mov r2, r3, lsr #24
strb r2, [r0, #+3]
mov r2, r3, lsr #16
strb r2, [r0, #+2]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
_bar:
ldr r3, LCPI0__bar
ldrb r2, [r3, #+1]
ldrb r3, [r3]
orr r3, r3, r2, lsl #8
strb r3, [r0]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
PPC:
_foo:
lis r2, ha16(_.str)
lbz r4, lo16(_.str)(r2)
la r2, lo16(_.str)(r2)
lbz r5, 2(r2)
lbz r6, 1(r2)
lbz r2, 3(r2)
slwi r4, r4, 8
slwi r5, r5, 8
rlwimi r4, r6, 0, 24, 31
rlwimi r5, r2, 0, 24, 31
slwi r2, r4, 16
rlwimi r2, r5, 0, 16, 31
srwi r4, r2, 24
srwi r5, r2, 8
srwi r6, r2, 16
stb r4, 0(r3)
stb r6, 1(r3)
stb r5, 2(r3)
stb r2, 3(r3)
blr
_bar:
lis r2, ha16(_.str1)
lbz r4, lo16(_.str1)(r2)
la r2, lo16(_.str1)(r2)
lbz r2, 1(r2)
slwi r4, r4, 8
rlwimi r4, r2, 0, 24, 31
srwi r2, r4, 8
stb r2, 0(r3)
stb r4, 1(r3)
blr
More information about the llvm-commits
mailing list