[llvm-commits] [llvm] r43176 - in /llvm/trunk/lib/Target/ARM: ARMISelLowering.cpp ARMISelLowering.h

Evan Cheng evan.cheng at apple.com
Mon Oct 22 14:31:06 PDT 2007


Hi Rafael,

There a couple of problems with this patch.

1. This breaks a number of tests for me:
llc-beta /MultiSource/Applications/JM/ldecod/ldecod
llc /MultiSource/Applications/JM/ldecod/ldecod
llc /External/SPEC/CINT2000/254.gap/254.gap
llc /MultiSource/Benchmarks/MiBench/office-ispell/office-ispell
llc-beta /MultiSource/Benchmarks/MiBench/office-ispell/office-ispell
llc-beta /External/SPEC/CINT2000/254.gap/254.gap

LowerMEMCPYInline() is asserting when the memcpy is 4-byte aligned but  
the size isn't multiple of 4. I'm going to remove the assertion and  
issue byte / word loads / stores to handle the trailing bytes. Please  
let me know if that's undesirable.

2. The stores are unnecessarily serialized. Each of them should use  
the load tokenfactor as input chain. All the chains produced by the  
stores should then be fed into a new tokenfactor. I'll fix this.

Thanks,

Evan

On Oct 19, 2007, at 7:35 AM, Rafael Espindola wrote:

> Author: rafael
> Date: Fri Oct 19 09:35:17 2007
> New Revision: 43176
>
> URL: http://llvm.org/viewvc/llvm-project?rev=43176&view=rev
> Log:
> split LowerMEMCPY into LowerMEMCPYCall and LowerMEMCPYInline in the  
> ARM backend.
>
> Modified:
>    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=43176&r1=43175&r2=43176&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Oct 19  
> 09:35:17 2007
> @@ -1288,40 +1288,73 @@
> }
>
> SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG  
> &DAG) {
> -  SDOperand Chain = Op.getOperand(0);
> -  SDOperand Dest = Op.getOperand(1);
> -  SDOperand Src = Op.getOperand(2);
> -  SDOperand Count = Op.getOperand(3);
> -  unsigned Align =
> -    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
> +  SDOperand ChainOp = Op.getOperand(0);
> +  SDOperand DestOp = Op.getOperand(1);
> +  SDOperand SourceOp = Op.getOperand(2);
> +  SDOperand CountOp = Op.getOperand(3);
> +  SDOperand AlignOp = Op.getOperand(4);
> +  SDOperand AlwaysInlineOp = Op.getOperand(5);
> +
> +  bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)- 
> >getValue();
> +  unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)- 
> >getValue();
>   if (Align == 0) Align = 1;
>
> -  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Count);
> -  // Just call memcpy if:
> -  // not 4-byte aligned
> -  // size is unknown
> -  // size is >= the threshold.
> -  if ((Align & 3) != 0 ||
> -       !I ||
> -       I->getValue() >= 64 ||
> -       (I->getValue() & 3) != 0) {
> -    MVT::ValueType IntPtr = getPointerTy();
> -    TargetLowering::ArgListTy Args;
> -    TargetLowering::ArgListEntry Entry;
> -    Entry.Ty = getTargetData()->getIntPtrType();
> -    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
> -    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
> -    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
> -    std::pair<SDOperand,SDOperand> CallResult =
> +  // If size is unknown, call memcpy.
> +  ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
> +  if (!I) {
> +    assert(!AlwaysInline && "Cannot inline copy of unknown size");
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +  }
> +  unsigned Size = I->getValue();
> +
> +  if (AlwaysInline)
> +    return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size,  
> Align, DAG);
> +
> +  // The libc version is likely to be faster for the following  
> cases. It can
> +  // use the address value and run time information about the CPU.
> +  // With glibc 2.6.1 on a core 2, coping an array of 100M longs  
> was 30% faster
> +
> +  // If not DWORD aligned, call memcpy.
> +  if ((Align & 3) != 0)
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> +  // If size is more than the threshold, call memcpy.
> +  //  if (Size > Subtarget->getMinRepStrSizeThreshold())
> +  if (Size >= 64)
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> +  return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align,  
> DAG);
> +}
> +
> +SDOperand ARMTargetLowering::LowerMEMCPYCall(SDOperand Chain,
> +                                             SDOperand Dest,
> +                                             SDOperand Source,
> +                                             SDOperand Count,
> +                                             SelectionDAG &DAG) {
> +  MVT::ValueType IntPtr = getPointerTy();
> +  TargetLowering::ArgListTy Args;
> +  TargetLowering::ArgListEntry Entry;
> +  Entry.Ty = getTargetData()->getIntPtrType();
> +  Entry.Node = Dest; Args.push_back(Entry);
> +  Entry.Node = Source; Args.push_back(Entry);
> +  Entry.Node = Count; Args.push_back(Entry);
> +  std::pair<SDOperand,SDOperand> CallResult =
>       LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C,  
> false,
>                   DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
> -    return CallResult.second;
> -  }
> +  return CallResult.second;
> +}
> +
> +SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
> +                                               SDOperand Dest,
> +                                               SDOperand Source,
> +                                               unsigned Size,
> +                                               unsigned Align,
> +                                               SelectionDAG &DAG) {
>
> -  // Otherwise do repeated 4-byte loads and stores.  To be improved.
> -  assert((I->getValue() & 3) == 0);
> +  // Do repeated 4-byte loads and stores.  To be improved.
> +  assert((Size& 3) == 0);
>   assert((Align & 3) == 0);
> -  unsigned NumMemOps = I->getValue() >> 2;
> +  unsigned NumMemOps = Size >> 2;
>   unsigned EmittedNumMemOps = 0;
>   unsigned SrcOff = 0, DstOff = 0;
>   MVT::ValueType VT = MVT::i32;
> @@ -1337,7 +1370,7 @@
>     unsigned i;
>     for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i 
> ++) {
>       Loads[i] = DAG.getLoad(VT, Chain,
> -                             DAG.getNode(ISD::ADD, VT, Src,
> +                             DAG.getNode(ISD::ADD, VT, Source,
>                                          DAG.getConstant(SrcOff, VT)),
>                              NULL, 0);
>       LoadChains[i] = Loads[i].getValue(1);
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=43176&r1=43175&r2=43176&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri Oct 19 09:35:17  
> 2007
> @@ -135,6 +135,14 @@
>     SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
> +    SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
> +                              SDOperand Source, SDOperand Count,
> +                              SelectionDAG &DAG);
> +    SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
> +                                SDOperand Source, unsigned Size,
> +                                unsigned Align, SelectionDAG &DAG);
> +
> +
>   };
> }
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list