[llvm-commits] [llvm] r157793 - in /llvm/trunk/lib/Target/ARM: ARMISelLowering.cpp ARMISelLowering.h ARMInstrInfo.td

Manman Ren mren at apple.com
Fri Jun 1 08:20:19 PDT 2012


Thanks a lot for reviewing.

Manman

On May 31, 2012, at 11:11 PM, Evan Cheng wrote:

> 1) We seem to have lost byval alignment? Without alignment info, it's not guaranteed to be safe to issue ldr / str. iOS has support for unaligned loads. But they may cause exception. 
Yes, I had some problem with the alignment info and forgot to add it.
It seems that I need to add a function in FastISel to handle FastEmitInst_rrii.
I will do that.

> 2) With alignment info, then it might be possible use NEON loads and stores if they are available. 
Not familiar with NEON instructions, but I will try to add those.
> 3) Please factor out the expansion code into a function. 
Will do.
> I can't help but feel but we do this in a target independent way. It's annoying to me codegen has a couple of places that expand memcpy's.  Perhaps we can add a target neutral memcpy instruction and each target can supply a routine to expand memcpy during pseudo expansion pass. That seems cleaner to me and allow us to handle memcpy calls from llvm ir and byval copy the same way. 
This requires changes to all targets. Most of the targets right now handle memcpy and byval copy at SelectionDAG.
I will think about this more and talk to you later about this :)
> 
> Evan
> 
> On May 31, 2012, at 7:44 PM, Manman Ren <mren at apple.com> wrote:
> 
>> Author: mren
>> Date: Thu May 31 21:44:42 2012
>> New Revision: 157793
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=157793&view=rev
>> Log:
>> ARM: support struct byval in llvm
>> 
>> We handle struct byval by inserting a pseudo op, which will be expanded to a
>> loop at ExpandISelPseudos.
>> A separate patch for clang will be submitted to enable struct byval.
>> 
>> rdar://9877866
>> 
>> Modified:
>>   llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>   llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>>   llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>> 
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=157793&r1=157792&r2=157793&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu May 31 21:44:42 2012
>> @@ -52,6 +52,7 @@
>> 
>> STATISTIC(NumTailCalls, "Number of tail calls");
>> STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
>> +STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
>> 
>> // This option should go away when tail calls fully work.
>> static cl::opt<bool>
>> @@ -1424,21 +1425,21 @@
>>        CCInfo.clearFirstByValReg();
>>      }
>> 
>> -      unsigned LocMemOffset = VA.getLocMemOffset();
>> -      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
>> -      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
>> -                                StkPtrOff);
>> -      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
>> -      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
>> -      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
>> -                                         MVT::i32);
>> -      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
>> -                                          Flags.getByValAlign(),
>> -                                          /*isVolatile=*/false,
>> -                                          /*AlwaysInline=*/false,
>> -                                          MachinePointerInfo(0),
>> -                                          MachinePointerInfo(0)));
>> -
>> +      if (Flags.getByValSize() - 4*offset > 0) {
>> +        unsigned LocMemOffset = VA.getLocMemOffset();
>> +        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
>> +        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
>> +                                  StkPtrOff);
>> +        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
>> +        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
>> +        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
>> +                                           MVT::i32);
>> +
>> +        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
>> +        SDValue Ops[] = { Chain, Dst, Src, SizeNode};
>> +        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
>> +                                          Ops, array_lengthof(Ops)));
>> +      }
>>    } else if (!IsSibCall) {
>>      assert(VA.isMemLoc());
>> 
>> @@ -6593,6 +6594,252 @@
>>    // return last added BB
>>    return SinkBB;
>>  }
>> +  case ARM::COPY_STRUCT_BYVAL_I32: {
>> +    ++NumLoopByVals;
>> +    // This pseudo instruction has 3 operands: dst, src, size
>> +    // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
>> +    // Otherwise, we will generate unrolled scalar copies.
>> +    const BasicBlock *LLVM_BB = BB->getBasicBlock();
>> +    MachineFunction::iterator It = BB;
>> +    ++It;
>> +
>> +    unsigned dest = MI->getOperand(0).getReg();
>> +    unsigned src = MI->getOperand(1).getReg();
>> +    unsigned size = MI->getOperand(2).getImm();
>> +    DebugLoc dl = MI->getDebugLoc();
>> +    unsigned BytesLeft = size & 3;
>> +    unsigned LoopSize = size - BytesLeft;
>> +
>> +    bool isThumb2 = Subtarget->isThumb2();
>> +    MachineFunction *MF = BB->getParent();
>> +    MachineRegisterInfo &MRI = MF->getRegInfo();
>> +    unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
>> +    unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
>> +
>> +    const TargetRegisterClass *TRC = isThumb2 ?
>> +      (const TargetRegisterClass*)&ARM::tGPRRegClass :
>> +      (const TargetRegisterClass*)&ARM::GPRRegClass;
>> +
>> +    if (size <= Subtarget->getMaxInlineSizeThreshold()) {
>> +      // Use LDR and STR to copy.
>> +      // [scratch, srcOut] = LDR_POST(srcIn, 4)
>> +      // [destOut] = STR_POST(scratch, destIn, 4)
>> +      unsigned srcIn = src;
>> +      unsigned destIn = dest;
>> +      for (unsigned i = 0; i < LoopSize; i+=4) {
>> +        unsigned scratch = MRI.createVirtualRegister(TRC);
>> +        unsigned srcOut = MRI.createVirtualRegister(TRC);
>> +        unsigned destOut = MRI.createVirtualRegister(TRC);
>> +        if (isThumb2) {
>> +          AddDefaultPred(BuildMI(*BB, MI, dl,
>> +            TII->get(ldrOpc), scratch)
>> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4));
>> +
>> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
>> +            .addReg(scratch).addReg(destIn)
>> +            .addImm(4));
>> +        } else {
>> +          AddDefaultPred(BuildMI(*BB, MI, dl,
>> +            TII->get(ldrOpc), scratch)
>> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4));
>> +
>> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
>> +            .addReg(scratch).addReg(destIn)
>> +            .addReg(0).addImm(4));
>> +        }
>> +        srcIn = srcOut;
>> +        destIn = destOut;
>> +      }
>> +
>> +      // Handle the leftover bytes with LDRB and STRB.
>> +      // [scratch, srcOut] = LDRB_POST(srcIn, 1)
>> +      // [destOut] = STRB_POST(scratch, destIn, 1)
>> +      ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
>> +      strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
>> +      for (unsigned i = 0; i < BytesLeft; i++) {
>> +        unsigned scratch = MRI.createVirtualRegister(TRC);
>> +        unsigned srcOut = MRI.createVirtualRegister(TRC);
>> +        unsigned destOut = MRI.createVirtualRegister(TRC);
>> +        if (isThumb2) {
>> +          AddDefaultPred(BuildMI(*BB, MI, dl,
>> +            TII->get(ldrOpc),scratch)
>> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
>> +
>> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
>> +            .addReg(scratch).addReg(destIn)
>> +            .addReg(0).addImm(1));
>> +        } else {
>> +          AddDefaultPred(BuildMI(*BB, MI, dl,
>> +            TII->get(ldrOpc),scratch)
>> +            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
>> +
>> +          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
>> +            .addReg(scratch).addReg(destIn)
>> +            .addReg(0).addImm(1));
>> +       }
>> +        srcIn = srcOut;
>> +        destIn = destOut;
>> +      }
>> +      MI->eraseFromParent();   // The instruction is gone now.
>> +      return BB;
>> +    }
>> +
>> +    // Expand the pseudo op to a loop.
>> +    // thisMBB:
>> +    //   ...
>> +    //   movw varEnd, # --> with thumb2
>> +    //   movt varEnd, #
>> +    //   ldrcp varEnd, idx --> without thumb2
>> +    //   fallthrough --> loopMBB
>> +    // loopMBB:
>> +    //   PHI varPhi, varEnd, varLoop
>> +    //   PHI srcPhi, src, srcLoop
>> +    //   PHI destPhi, dst, destLoop
>> +    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
>> +    //   [destLoop] = STR_POST(scratch, destPhi, 4)
>> +    //   subs varLoop, varPhi, #4
>> +    //   bne loopMBB
>> +    //   fallthrough --> exitMBB
>> +    // exitMBB:
>> +    //   epilogue to handle left-over bytes
>> +    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
>> +    //   [destOut] = STRB_POST(scratch, destLoop, 1)
>> +    MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
>> +    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
>> +    MF->insert(It, loopMBB);
>> +    MF->insert(It, exitMBB);
>> +
>> +    // Transfer the remainder of BB and its successor edges to exitMBB.
>> +    exitMBB->splice(exitMBB->begin(), BB,
>> +                    llvm::next(MachineBasicBlock::iterator(MI)),
>> +                    BB->end());
>> +    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
>> +
>> +    // Load an immediate to varEnd.
>> +    unsigned varEnd = MRI.createVirtualRegister(TRC);
>> +    if (isThumb2) {
>> +      unsigned VReg1 = varEnd;
>> +      if ((LoopSize & 0xFFFF0000) != 0)
>> +        VReg1 = MRI.createVirtualRegister(TRC);
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
>> +                     .addImm(LoopSize & 0xFFFF));
>> +
>> +      if ((LoopSize & 0xFFFF0000) != 0)
>> +        AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
>> +                       .addReg(VReg1)
>> +                       .addImm(LoopSize >> 16));
>> +    } else {
>> +      MachineConstantPool *ConstantPool = MF->getConstantPool();
>> +      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
>> +      const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
>> +
>> +      // MachineConstantPool wants an explicit alignment.
>> +      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
>> +      if (Align == 0)
>> +        Align = getTargetData()->getTypeAllocSize(C->getType());
>> +      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
>> +
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
>> +                     .addReg(varEnd, RegState::Define)
>> +                     .addConstantPoolIndex(Idx)
>> +                     .addImm(0));
>> +    }
>> +    BB->addSuccessor(loopMBB);
>> +
>> +    // Generate the loop body:
>> +    //   varPhi = PHI(varLoop, varEnd)
>> +    //   srcPhi = PHI(srcLoop, src)
>> +    //   destPhi = PHI(destLoop, dst)
>> +    MachineBasicBlock *entryBB = BB;
>> +    BB = loopMBB;
>> +    unsigned varLoop = MRI.createVirtualRegister(TRC);
>> +    unsigned varPhi = MRI.createVirtualRegister(TRC);
>> +    unsigned srcLoop = MRI.createVirtualRegister(TRC);
>> +    unsigned srcPhi = MRI.createVirtualRegister(TRC);
>> +    unsigned destLoop = MRI.createVirtualRegister(TRC);
>> +    unsigned destPhi = MRI.createVirtualRegister(TRC);
>> +
>> +    BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
>> +      .addReg(varLoop).addMBB(loopMBB)
>> +      .addReg(varEnd).addMBB(entryBB);
>> +    BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
>> +      .addReg(srcLoop).addMBB(loopMBB)
>> +      .addReg(src).addMBB(entryBB);
>> +    BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
>> +      .addReg(destLoop).addMBB(loopMBB)
>> +      .addReg(dest).addMBB(entryBB);
>> +
>> +    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
>> +    //   [destLoop] = STR_POST(scratch, destPhi, 4)
>> +    unsigned scratch = MRI.createVirtualRegister(TRC);
>> +    if (isThumb2) {
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
>> +        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4));
>> +
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
>> +        .addReg(scratch).addReg(destPhi)
>> +        .addImm(4));
>> +    } else {
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
>> +        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4));
>> +
>> +      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
>> +        .addReg(scratch).addReg(destPhi)
>> +        .addReg(0).addImm(4));
>> +    }
>> +
>> +    // Decrement loop variable by 4.
>> +    MachineInstrBuilder MIB = BuildMI(BB, dl,
>> +      TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
>> +    AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4)));
>> +    MIB->getOperand(5).setReg(ARM::CPSR);
>> +    MIB->getOperand(5).setIsDef(true);
>> +
>> +    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
>> +      .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
>> +
>> +    // loopMBB can loop back to loopMBB or fall through to exitMBB.
>> +    BB->addSuccessor(loopMBB);
>> +    BB->addSuccessor(exitMBB);
>> +
>> +    // Add epilogue to handle BytesLeft.
>> +    BB = exitMBB;
>> +    MachineInstr *StartOfExit = exitMBB->begin();
>> +    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
>> +    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
>> +
>> +    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
>> +    //   [destOut] = STRB_POST(scratch, destLoop, 1)
>> +    unsigned srcIn = srcLoop;
>> +    unsigned destIn = destLoop;
>> +    for (unsigned i = 0; i < BytesLeft; i++) {
>> +      unsigned scratch = MRI.createVirtualRegister(TRC);
>> +      unsigned srcOut = MRI.createVirtualRegister(TRC);
>> +      unsigned destOut = MRI.createVirtualRegister(TRC);
>> +      if (isThumb2) {
>> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
>> +          TII->get(ldrOpc),scratch)
>> +          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
>> +
>> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
>> +          .addReg(scratch).addReg(destIn)
>> +          .addImm(1));
>> +      } else {
>> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
>> +          TII->get(ldrOpc),scratch)
>> +          .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
>> +
>> +        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
>> +          .addReg(scratch).addReg(destIn)
>> +          .addReg(0).addImm(1));
>> +      }
>> +      srcIn = srcOut;
>> +      destIn = destOut;
>> +    }
>> +
>> +    MI->eraseFromParent();   // The instruction is gone now.
>> +    return BB;
>> +  }
>>  }
>> }
>> 
>> 
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=157793&r1=157792&r2=157793&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu May 31 21:44:42 2012
>> @@ -41,6 +41,9 @@
>>                    // PIC mode.
>>      WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
>> 
>> +      // Add pseudo op to model memcpy for struct byval.
>> +      COPY_STRUCT_BYVAL,
>> +
>>      CALL,         // Function call.
>>      CALL_PRED,    // Function call that's predicable.
>>      CALL_NOLINK,  // Function call with branch not branch-and-link.
>> 
>> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=157793&r1=157792&r2=157793&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu May 31 21:44:42 2012
>> @@ -18,6 +18,9 @@
>> // Type profiles.
>> def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
>> def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
>> +def SDT_ARMStructByVal : SDTypeProfile<0, 3,
>> +                                       [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
>> +                                        SDTCisVT<2, i32>]>;
>> 
>> def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
>> 
>> @@ -90,6 +93,10 @@
>>                              [SDNPHasChain, SDNPOutGlue]>;
>> def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
>>                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
>> +def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
>> +                                SDT_ARMStructByVal,
>> +                                [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
>> +                                 SDNPMayStore, SDNPMayLoad]>;
>> 
>> def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
>>                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
>> @@ -4165,6 +4172,13 @@
>> }
>> }
>> 
>> +let usesCustomInserter = 1 in {
>> +    def COPY_STRUCT_BYVAL_I32 : PseudoInst<
>> +      (outs), (ins GPR:$dst, GPR:$src, i32imm:$size),
>> +      NoItinerary,
>> +      [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>;
>> +}
>> +
>> let mayLoad = 1 in {
>> def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
>>                     NoItinerary,
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list