[llvm-commits] [llvm] r42864 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll

Fri Oct 12 10:30:55 PDT 2007

>>> In general memcpy handling is highly target specific.  I'd be wary
>>> of doing too
>>> much in target-independent code, although I think the general idea
>>> here is OK.
>>
>> I'm expecting target specific code generator to  handle this if it
>> is handling unaligned memcpy.
>> No ?
>
> Might well work, I'm not sure what order things are done in.  Could
> you try it?  PPC for example.

Heh, ouch. :)  We are doing the safe thing, but generating horrible  
horrible code (below).

The DAG combiner should be able to see through the masking/shifting  
to allow each byte of the store to directly use the corresponding  
load.  This is probably just a matter of having  
DAGCombiner::visitSTORE call TLI.SimplifyDemandedBits, indicating  
which bits it is actually reading.

Can someone looking into adding this enhancement to dag combine?

-Chris

void foo(char *P) {
   strcpy(P, "abc");
}

void bar(char *P) {
   strcpy(P, "a");
}

ARM:

_foo:
         ldr r3, LCPI0__foo
         ldrb r2, [r3, #+3]
         ldrb r1, [r3, #+2]
         orr r2, r1, r2, lsl #8
         ldrb r1, [r3, #+1]
         ldrb r3, [r3]
         orr r3, r3, r1, lsl #8
         orr r3, r3, r2, lsl #16
         strb r3, [r0]
         mov r2, r3, lsr #24
         strb r2, [r0, #+3]
         mov r2, r3, lsr #16
         strb r2, [r0, #+2]
         mov r3, r3, lsr #8
         strb r3, [r0, #+1]
         bx lr

_bar:
         ldr r3, LCPI0__bar
         ldrb r2, [r3, #+1]
         ldrb r3, [r3]
         orr r3, r3, r2, lsl #8
         strb r3, [r0]
         mov r3, r3, lsr #8
         strb r3, [r0, #+1]
         bx lr

PPC:

_foo:
         lis r2, ha16(_.str)
         lbz r4, lo16(_.str)(r2)
         la r2, lo16(_.str)(r2)
         lbz r5, 2(r2)
         lbz r6, 1(r2)
         lbz r2, 3(r2)
         slwi r4, r4, 8
         slwi r5, r5, 8
         rlwimi r4, r6, 0, 24, 31
         rlwimi r5, r2, 0, 24, 31
         slwi r2, r4, 16
         rlwimi r2, r5, 0, 16, 31
         srwi r4, r2, 24
         srwi r5, r2, 8
         srwi r6, r2, 16
         stb r4, 0(r3)
         stb r6, 1(r3)
         stb r5, 2(r3)
         stb r2, 3(r3)
         blr
_bar:
         lis r2, ha16(_.str1)
         lbz r4, lo16(_.str1)(r2)
         la r2, lo16(_.str1)(r2)
         lbz r2, 1(r2)
         slwi r4, r4, 8
         rlwimi r4, r2, 0, 24, 31
         srwi r2, r4, 8
         stb r2, 0(r3)
         stb r4, 1(r3)
         blr