[llvm-commits] [llvm] r157793 - in /llvm/trunk/lib/Target/ARM: ARMISelLowering.cpp ARMISelLowering.h ARMInstrInfo.td

Evan Cheng evan.cheng at apple.com
Thu May 31 23:11:51 PDT 2012


Hi Manman,

Sorry about the late review. Some comments below:

1) We seem to have lost byval alignment? Without alignment info, it's not guaranteed to be safe to issue ldr / str. iOS has support for unaligned loads. But they may cause exception. 
2) With alignment info, then it might be possible use NEON loads and stores if they are available. 
3) Please factor out the expansion code into a function. 

I can't help but feel but we do this in a target independent way. It's annoying to me codegen has a couple of places that expand memcpy's.  Perhaps we can add a target neutral memcpy instruction and each target can supply a routine to expand memcpy during pseudo expansion pass. That seems cleaner to me and allow us to handle memcpy calls from llvm ir and byval copy the same way. 

Evan

On May 31, 2012, at 7:44 PM, Manman Ren <mren at apple.com> wrote:

> Author: mren
> Date: Thu May 31 21:44:42 2012
> New Revision: 157793
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=157793&view=rev
> Log:
> ARM: support struct byval in llvm
> 
> We handle struct byval by inserting a pseudo op, which will be expanded to a
> loop at ExpandISelPseudos.
> A separate patch for clang will be submitted to enable struct byval.
> 
> rdar://9877866
> 
> Modified:
>    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>    llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=157793&r1=157792&r2=157793&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu May 31 21:44:42 2012
> @@ -52,6 +52,7 @@
> 
> STATISTIC(NumTailCalls, "Number of tail calls");
> STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
> +STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
> 
> // This option should go away when tail calls fully work.
> static cl::opt<bool>
> @@ -1424,21 +1425,21 @@
>         CCInfo.clearFirstByValReg();
>       }
> 
> -      unsigned LocMemOffset = VA.getLocMemOffset();
> -      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
> -      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
> -                                StkPtrOff);
> -      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
> -      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
> -      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
> -                                         MVT::i32);
> -      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
> -                                          Flags.getByValAlign(),
> -                                          /*isVolatile=*/false,
> -                                          /*AlwaysInline=*/false,
> -                                          MachinePointerInfo(0),
> -                                          MachinePointerInfo(0)));
> -
> +      if (Flags.getByValSize() - 4*offset > 0) {
> +        unsigned LocMemOffset = VA.getLocMemOffset();
> +        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
> +        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
> +                                  StkPtrOff);
> +        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
> +        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
> +        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
> +                                           MVT::i32);
> +
> +        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +        SDValue Ops[] = { Chain, Dst, Src, SizeNode};
> +        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
> +                                          Ops, array_lengthof(Ops)));
> +      }
>     } else if (!IsSibCall) {
>       assert(VA.isMemLoc());
> 
> @@ -6593,6 +6594,252 @@
>     // return last added BB
>     return SinkBB;
>   }
> +  case ARM::COPY_STRUCT_BYVAL_I32: {
> +    ++NumLoopByVals;
> +    // This pseudo instruction has 3 operands: dst, src, size
> +    // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
> +    // Otherwise, we will generate unrolled scalar copies.
> +    const BasicBlock *LLVM_BB = BB->getBasicBlock();
> +    MachineFunction::iterator It = BB;
> +    ++It;
> +
> +    unsigned dest = MI->getOperand(0).getReg();
> +    unsigned src = MI->getOperand(1).getReg();
> +    unsigned size = MI->getOperand(2).getImm();
> +    DebugLoc dl = MI->getDebugLoc();
> +    unsigned BytesLeft = size & 3;
> +    unsigned LoopSize = size - BytesLeft;
> +
> +    bool isThumb2 = Subtarget->isThumb2();
> +    MachineFunction *MF = BB->getParent();
> +    MachineRegisterInfo &MRI = MF->getRegInfo();
> +    unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
> +    unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
> +
> +    const TargetRegisterClass *TRC = isThumb2 ?
> +      (const TargetRegisterClass*)&ARM::tGPRRegClass :
> +      (const TargetRegisterClass*)&ARM::GPRRegClass;
> +
> +    if (size <= Subtarget->getMaxInlineSizeThreshold()) {
> +      // Use LDR and STR to copy.
> +      // [scratch, srcOut] = LDR_POST(srcIn, 4)
> +      // [destOut] = STR_POST(scratch, destIn, 4)
> +      unsigned srcIn = src;
> +      unsigned destIn = dest;
> +      for (unsigned i = 0; i < LoopSize; i+=4) {
> +        unsigned scratch = MRI.createVirtualRegister(TRC);
> +        unsigned srcOut = MRI.createVirtualRegister(TRC);
> +        unsigned destOut = MRI.createVirtualRegister(TRC);
> +        if (isThumb2) {
> +          AddDefaultPred(BuildMI(*BB, MI, dl,
> +            TII->get(ldrOpc), scratch)
> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4));
> +
> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
> +            .addReg(scratch).addReg(destIn)
> +            .addImm(4));
> +        } else {
> +          AddDefaultPred(BuildMI(*BB, MI, dl,
> +            TII->get(ldrOpc), scratch)
> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4));
> +
> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
> +            .addReg(scratch).addReg(destIn)
> +            .addReg(0).addImm(4));
> +        }
> +        srcIn = srcOut;
> +        destIn = destOut;
> +      }
> +
> +      // Handle the leftover bytes with LDRB and STRB.
> +      // [scratch, srcOut] = LDRB_POST(srcIn, 1)
> +      // [destOut] = STRB_POST(scratch, destIn, 1)
> +      ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
> +      strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
> +      for (unsigned i = 0; i < BytesLeft; i++) {
> +        unsigned scratch = MRI.createVirtualRegister(TRC);
> +        unsigned srcOut = MRI.createVirtualRegister(TRC);
> +        unsigned destOut = MRI.createVirtualRegister(TRC);
> +        if (isThumb2) {
> +          AddDefaultPred(BuildMI(*BB, MI, dl,
> +            TII->get(ldrOpc),scratch)
> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
> +
> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
> +            .addReg(scratch).addReg(destIn)
> +            .addReg(0).addImm(1));
> +        } else {
> +          AddDefaultPred(BuildMI(*BB, MI, dl,
> +            TII->get(ldrOpc),scratch)
> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
> +
> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
> +            .addReg(scratch).addReg(destIn)
> +            .addReg(0).addImm(1));
> +       }
> +        srcIn = srcOut;
> +        destIn = destOut;
> +      }
> +      MI->eraseFromParent();   // The instruction is gone now.
> +      return BB;
> +    }
> +
> +    // Expand the pseudo op to a loop.
> +    // thisMBB:
> +    //   ...
> +    //   movw varEnd, # --> with thumb2
> +    //   movt varEnd, #
> +    //   ldrcp varEnd, idx --> without thumb2
> +    //   fallthrough --> loopMBB
> +    // loopMBB:
> +    //   PHI varPhi, varEnd, varLoop
> +    //   PHI srcPhi, src, srcLoop
> +    //   PHI destPhi, dst, destLoop
> +    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
> +    //   [destLoop] = STR_POST(scratch, destPhi, 4)
> +    //   subs varLoop, varPhi, #4
> +    //   bne loopMBB
> +    //   fallthrough --> exitMBB
> +    // exitMBB:
> +    //   epilogue to handle left-over bytes
> +    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
> +    //   [destOut] = STRB_POST(scratch, destLoop, 1)
> +    MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> +    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> +    MF->insert(It, loopMBB);
> +    MF->insert(It, exitMBB);
> +
> +    // Transfer the remainder of BB and its successor edges to exitMBB.
> +    exitMBB->splice(exitMBB->begin(), BB,
> +                    llvm::next(MachineBasicBlock::iterator(MI)),
> +                    BB->end());
> +    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> +
> +    // Load an immediate to varEnd.
> +    unsigned varEnd = MRI.createVirtualRegister(TRC);
> +    if (isThumb2) {
> +      unsigned VReg1 = varEnd;
> +      if ((LoopSize & 0xFFFF0000) != 0)
> +        VReg1 = MRI.createVirtualRegister(TRC);
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
> +                     .addImm(LoopSize & 0xFFFF));
> +
> +      if ((LoopSize & 0xFFFF0000) != 0)
> +        AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
> +                       .addReg(VReg1)
> +                       .addImm(LoopSize >> 16));
> +    } else {
> +      MachineConstantPool *ConstantPool = MF->getConstantPool();
> +      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
> +      const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
> +
> +      // MachineConstantPool wants an explicit alignment.
> +      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
> +      if (Align == 0)
> +        Align = getTargetData()->getTypeAllocSize(C->getType());
> +      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
> +
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
> +                     .addReg(varEnd, RegState::Define)
> +                     .addConstantPoolIndex(Idx)
> +                     .addImm(0));
> +    }
> +    BB->addSuccessor(loopMBB);
> +
> +    // Generate the loop body:
> +    //   varPhi = PHI(varLoop, varEnd)
> +    //   srcPhi = PHI(srcLoop, src)
> +    //   destPhi = PHI(destLoop, dst)
> +    MachineBasicBlock *entryBB = BB;
> +    BB = loopMBB;
> +    unsigned varLoop = MRI.createVirtualRegister(TRC);
> +    unsigned varPhi = MRI.createVirtualRegister(TRC);
> +    unsigned srcLoop = MRI.createVirtualRegister(TRC);
> +    unsigned srcPhi = MRI.createVirtualRegister(TRC);
> +    unsigned destLoop = MRI.createVirtualRegister(TRC);
> +    unsigned destPhi = MRI.createVirtualRegister(TRC);
> +
> +    BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
> +      .addReg(varLoop).addMBB(loopMBB)
> +      .addReg(varEnd).addMBB(entryBB);
> +    BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
> +      .addReg(srcLoop).addMBB(loopMBB)
> +      .addReg(src).addMBB(entryBB);
> +    BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
> +      .addReg(destLoop).addMBB(loopMBB)
> +      .addReg(dest).addMBB(entryBB);
> +
> +    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
> +    //   [destLoop] = STR_POST(scratch, destPhi, 4)
> +    unsigned scratch = MRI.createVirtualRegister(TRC);
> +    if (isThumb2) {
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
> +        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4));
> +
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
> +        .addReg(scratch).addReg(destPhi)
> +        .addImm(4));
> +    } else {
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
> +        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4));
> +
> +      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
> +        .addReg(scratch).addReg(destPhi)
> +        .addReg(0).addImm(4));
> +    }
> +
> +    // Decrement loop variable by 4.
> +    MachineInstrBuilder MIB = BuildMI(BB, dl,
> +      TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
> +    AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4)));
> +    MIB->getOperand(5).setReg(ARM::CPSR);
> +    MIB->getOperand(5).setIsDef(true);
> +
> +    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
> +      .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
> +
> +    // loopMBB can loop back to loopMBB or fall through to exitMBB.
> +    BB->addSuccessor(loopMBB);
> +    BB->addSuccessor(exitMBB);
> +
> +    // Add epilogue to handle BytesLeft.
> +    BB = exitMBB;
> +    MachineInstr *StartOfExit = exitMBB->begin();
> +    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
> +    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
> +
> +    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
> +    //   [destOut] = STRB_POST(scratch, destLoop, 1)
> +    unsigned srcIn = srcLoop;
> +    unsigned destIn = destLoop;
> +    for (unsigned i = 0; i < BytesLeft; i++) {
> +      unsigned scratch = MRI.createVirtualRegister(TRC);
> +      unsigned srcOut = MRI.createVirtualRegister(TRC);
> +      unsigned destOut = MRI.createVirtualRegister(TRC);
> +      if (isThumb2) {
> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
> +          TII->get(ldrOpc),scratch)
> +          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
> +
> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
> +          .addReg(scratch).addReg(destIn)
> +          .addImm(1));
> +      } else {
> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
> +          TII->get(ldrOpc),scratch)
> +          .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
> +
> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
> +          .addReg(scratch).addReg(destIn)
> +          .addReg(0).addImm(1));
> +      }
> +      srcIn = srcOut;
> +      destIn = destOut;
> +    }
> +
> +    MI->eraseFromParent();   // The instruction is gone now.
> +    return BB;
> +  }
>   }
> }
> 
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=157793&r1=157792&r2=157793&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu May 31 21:44:42 2012
> @@ -41,6 +41,9 @@
>                     // PIC mode.
>       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
> 
> +      // Add pseudo op to model memcpy for struct byval.
> +      COPY_STRUCT_BYVAL,
> +
>       CALL,         // Function call.
>       CALL_PRED,    // Function call that's predicable.
>       CALL_NOLINK,  // Function call with branch not branch-and-link.
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=157793&r1=157792&r2=157793&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu May 31 21:44:42 2012
> @@ -18,6 +18,9 @@
> // Type profiles.
> def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
> def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
> +def SDT_ARMStructByVal : SDTypeProfile<0, 3,
> +                                       [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
> +                                        SDTCisVT<2, i32>]>;
> 
> def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
> 
> @@ -90,6 +93,10 @@
>                               [SDNPHasChain, SDNPOutGlue]>;
> def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
>                               [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
> +def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
> +                                SDT_ARMStructByVal,
> +                                [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
> +                                 SDNPMayStore, SDNPMayLoad]>;
> 
> def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
>                               [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
> @@ -4165,6 +4172,13 @@
> }
> }
> 
> +let usesCustomInserter = 1 in {
> +    def COPY_STRUCT_BYVAL_I32 : PseudoInst<
> +      (outs), (ins GPR:$dst, GPR:$src, i32imm:$size),
> +      NoItinerary,
> +      [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>;
> +}
> +
> let mayLoad = 1 in {
> def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
>                      NoItinerary,
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list