[llvm-commits] [llvm] r157830 - in /llvm/trunk: include/llvm/CodeGen/FastISel.h lib/CodeGen/SelectionDAG/FastISel.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/ARM/ARMInstrInfo.td

Manman Ren mren at apple.com
Fri Jun 1 12:33:18 PDT 2012


Author: mren
Date: Fri Jun  1 14:33:18 2012
New Revision: 157830

URL: http://llvm.org/viewvc/llvm-project?rev=157830&view=rev
Log:
ARM: properly handle alignment for struct byval.

Factor out the expansion code into a function.
This change is to be enabled in clang.

rdar://9877866

Modified:
    llvm/trunk/include/llvm/CodeGen/FastISel.h
    llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/lib/Target/ARM/ARMInstrInfo.td

Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=157830&r1=157829&r2=157830&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/FastISel.h (original)
+++ llvm/trunk/include/llvm/CodeGen/FastISel.h Fri Jun  1 14:33:18 2012
@@ -299,6 +299,15 @@
                             unsigned Op1, bool Op1IsKill,
                             uint64_t Imm);
 
+  /// FastEmitInst_rrii - Emit a MachineInstr with two register operands,
+  /// two immediates operands, and a result register in the given register
+  /// class.
+  unsigned FastEmitInst_rrii(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC,
+                             unsigned Op0, bool Op0IsKill,
+                             unsigned Op1, bool Op1IsKill,
+                             uint64_t Imm1, uint64_t Imm2);
+
   /// FastEmitInst_i - Emit a MachineInstr with a single immediate
   /// operand, and a result register in the given register class.
   unsigned FastEmitInst_i(unsigned MachineInstrOpcode,

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=157830&r1=157829&r2=157830&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Fri Jun  1 14:33:18 2012
@@ -1306,6 +1306,30 @@
   return ResultReg;
 }
 
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     unsigned Op1, bool Op1IsKill,
+                                     uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm1).addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm1).addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   uint64_t Imm) {

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=157830&r1=157829&r2=157830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Jun  1 14:33:18 2012
@@ -1434,9 +1434,10 @@
         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
                                            MVT::i32);
+        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
 
         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue Ops[] = { Chain, Dst, Src, SizeNode};
+        SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops, array_lengthof(Ops)));
       }
@@ -6239,6 +6240,270 @@
   llvm_unreachable("Expecting a BB with two successors!");
 }
 
+MachineBasicBlock *ARMTargetLowering::
+EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+  // This pseudo instruction has 3 operands: dst, src, size
+  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+  // Otherwise, we will generate unrolled scalar copies.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned src = MI->getOperand(1).getReg();
+  unsigned SizeVal = MI->getOperand(2).getImm();
+  unsigned Align = MI->getOperand(3).getImm();
+  DebugLoc dl = MI->getDebugLoc();
+
+  bool isThumb2 = Subtarget->isThumb2();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  unsigned ldrOpc, strOpc, UnitSize;
+
+  const TargetRegisterClass *TRC = isThumb2 ?
+    (const TargetRegisterClass*)&ARM::tGPRRegClass :
+    (const TargetRegisterClass*)&ARM::GPRRegClass;
+
+  if (Align & 1) {
+    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+    UnitSize = 1;
+  } else if (Align & 2) {
+    ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
+    strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
+    UnitSize = 2;
+  } else {
+    ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+    strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+    UnitSize = 4;
+  }
+  unsigned BytesLeft = SizeVal % UnitSize;
+  unsigned LoopSize = SizeVal - BytesLeft;
+
+  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+    // Use LDR and STR to copy.
+    // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+    // [destOut] = STR_POST(scratch, destIn, UnitSize)
+    unsigned srcIn = src;
+    unsigned destIn = dest;
+    for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+      unsigned scratch = MRI.createVirtualRegister(TRC);
+      unsigned srcOut = MRI.createVirtualRegister(TRC);
+      unsigned destOut = MRI.createVirtualRegister(TRC);
+      if (isThumb2) {
+        AddDefaultPred(BuildMI(*BB, MI, dl,
+          TII->get(ldrOpc), scratch)
+          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
+
+        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+          .addReg(scratch).addReg(destIn)
+          .addImm(UnitSize));
+      } else {
+        AddDefaultPred(BuildMI(*BB, MI, dl,
+          TII->get(ldrOpc), scratch)
+          .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
+          .addImm(UnitSize));
+
+        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+          .addReg(scratch).addReg(destIn)
+          .addReg(0).addImm(UnitSize));
+      }
+      srcIn = srcOut;
+      destIn = destOut;
+    }
+
+    // Handle the leftover bytes with LDRB and STRB.
+    // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+    // [destOut] = STRB_POST(scratch, destIn, 1)
+    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+    for (unsigned i = 0; i < BytesLeft; i++) {
+      unsigned scratch = MRI.createVirtualRegister(TRC);
+      unsigned srcOut = MRI.createVirtualRegister(TRC);
+      unsigned destOut = MRI.createVirtualRegister(TRC);
+      if (isThumb2) {
+        AddDefaultPred(BuildMI(*BB, MI, dl,
+          TII->get(ldrOpc),scratch)
+          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+          .addReg(scratch).addReg(destIn)
+          .addReg(0).addImm(1));
+      } else {
+        AddDefaultPred(BuildMI(*BB, MI, dl,
+          TII->get(ldrOpc),scratch)
+          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+          .addReg(scratch).addReg(destIn)
+          .addReg(0).addImm(1));
+      }
+      srcIn = srcOut;
+      destIn = destOut;
+    }
+    MI->eraseFromParent();   // The instruction is gone now.
+    return BB;
+  }
+
+  // Expand the pseudo op to a loop.
+  // thisMBB:
+  //   ...
+  //   movw varEnd, # --> with thumb2
+  //   movt varEnd, #
+  //   ldrcp varEnd, idx --> without thumb2
+  //   fallthrough --> loopMBB
+  // loopMBB:
+  //   PHI varPhi, varEnd, varLoop
+  //   PHI srcPhi, src, srcLoop
+  //   PHI destPhi, dst, destLoop
+  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+  //   [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+  //   subs varLoop, varPhi, #UnitSize
+  //   bne loopMBB
+  //   fallthrough --> exitMBB
+  // exitMBB:
+  //   epilogue to handle left-over bytes
+  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+  //   [destOut] = STRB_POST(scratch, destLoop, 1)
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Load an immediate to varEnd.
+  unsigned varEnd = MRI.createVirtualRegister(TRC);
+  if (isThumb2) {
+    unsigned VReg1 = varEnd;
+    if ((LoopSize & 0xFFFF0000) != 0)
+      VReg1 = MRI.createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
+                   .addImm(LoopSize & 0xFFFF));
+
+    if ((LoopSize & 0xFFFF0000) != 0)
+      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+                     .addReg(VReg1)
+                     .addImm(LoopSize >> 16));
+  } else {
+    MachineConstantPool *ConstantPool = MF->getConstantPool();
+    Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+    const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+    // MachineConstantPool wants an explicit alignment.
+    unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+    if (Align == 0)
+      Align = getTargetData()->getTypeAllocSize(C->getType());
+    unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
+                   .addReg(varEnd, RegState::Define)
+                   .addConstantPoolIndex(Idx)
+                   .addImm(0));
+  }
+  BB->addSuccessor(loopMBB);
+
+  // Generate the loop body:
+  //   varPhi = PHI(varLoop, varEnd)
+  //   srcPhi = PHI(srcLoop, src)
+  //   destPhi = PHI(destLoop, dst)
+  MachineBasicBlock *entryBB = BB;
+  BB = loopMBB;
+  unsigned varLoop = MRI.createVirtualRegister(TRC);
+  unsigned varPhi = MRI.createVirtualRegister(TRC);
+  unsigned srcLoop = MRI.createVirtualRegister(TRC);
+  unsigned srcPhi = MRI.createVirtualRegister(TRC);
+  unsigned destLoop = MRI.createVirtualRegister(TRC);
+  unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+    .addReg(varLoop).addMBB(loopMBB)
+    .addReg(varEnd).addMBB(entryBB);
+  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+    .addReg(srcLoop).addMBB(loopMBB)
+    .addReg(src).addMBB(entryBB);
+  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+    .addReg(destLoop).addMBB(loopMBB)
+    .addReg(dest).addMBB(entryBB);
+
+  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+  //   [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+  unsigned scratch = MRI.createVirtualRegister(TRC);
+  if (isThumb2) {
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+      .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
+
+    AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+      .addReg(scratch).addReg(destPhi)
+      .addImm(UnitSize));
+  } else {
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+      .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
+      .addImm(UnitSize));
+
+    AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+      .addReg(scratch).addReg(destPhi)
+      .addReg(0).addImm(UnitSize));
+  }
+
+  // Decrement loop variable by UnitSize.
+  MachineInstrBuilder MIB = BuildMI(BB, dl,
+    TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+  AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+  MIB->getOperand(5).setReg(ARM::CPSR);
+  MIB->getOperand(5).setIsDef(true);
+
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  // loopMBB can loop back to loopMBB or fall through to exitMBB.
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  // Add epilogue to handle BytesLeft.
+  BB = exitMBB;
+  MachineInstr *StartOfExit = exitMBB->begin();
+  ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+  strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+
+  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+  //   [destOut] = STRB_POST(scratch, destLoop, 1)
+  unsigned srcIn = srcLoop;
+  unsigned destIn = destLoop;
+  for (unsigned i = 0; i < BytesLeft; i++) {
+    unsigned scratch = MRI.createVirtualRegister(TRC);
+    unsigned srcOut = MRI.createVirtualRegister(TRC);
+    unsigned destOut = MRI.createVirtualRegister(TRC);
+    if (isThumb2) {
+      AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+        TII->get(ldrOpc),scratch)
+        .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+      AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+        .addReg(scratch).addReg(destIn)
+        .addImm(1));
+    } else {
+      AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+        TII->get(ldrOpc),scratch)
+        .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
+
+      AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+        .addReg(scratch).addReg(destIn)
+        .addReg(0).addImm(1));
+    }
+    srcIn = srcOut;
+    destIn = destOut;
+  }
+
+  MI->eraseFromParent();   // The instruction is gone now.
+  return BB;
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
@@ -6594,252 +6859,9 @@
     // return last added BB
     return SinkBB;
   }
-  case ARM::COPY_STRUCT_BYVAL_I32: {
+  case ARM::COPY_STRUCT_BYVAL_I32:
     ++NumLoopByVals;
-    // This pseudo instruction has 3 operands: dst, src, size
-    // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
-    // Otherwise, we will generate unrolled scalar copies.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    unsigned dest = MI->getOperand(0).getReg();
-    unsigned src = MI->getOperand(1).getReg();
-    unsigned size = MI->getOperand(2).getImm();
-    DebugLoc dl = MI->getDebugLoc();
-    unsigned BytesLeft = size & 3;
-    unsigned LoopSize = size - BytesLeft;
-
-    bool isThumb2 = Subtarget->isThumb2();
-    MachineFunction *MF = BB->getParent();
-    MachineRegisterInfo &MRI = MF->getRegInfo();
-    unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
-    unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
-
-    const TargetRegisterClass *TRC = isThumb2 ?
-      (const TargetRegisterClass*)&ARM::tGPRRegClass :
-      (const TargetRegisterClass*)&ARM::GPRRegClass;
-
-    if (size <= Subtarget->getMaxInlineSizeThreshold()) {
-      // Use LDR and STR to copy.
-      // [scratch, srcOut] = LDR_POST(srcIn, 4)
-      // [destOut] = STR_POST(scratch, destIn, 4)
-      unsigned srcIn = src;
-      unsigned destIn = dest;
-      for (unsigned i = 0; i < LoopSize; i+=4) {
-        unsigned scratch = MRI.createVirtualRegister(TRC);
-        unsigned srcOut = MRI.createVirtualRegister(TRC);
-        unsigned destOut = MRI.createVirtualRegister(TRC);
-        if (isThumb2) {
-          AddDefaultPred(BuildMI(*BB, MI, dl,
-            TII->get(ldrOpc), scratch)
-            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4));
-
-          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
-            .addReg(scratch).addReg(destIn)
-            .addImm(4));
-        } else {
-          AddDefaultPred(BuildMI(*BB, MI, dl,
-            TII->get(ldrOpc), scratch)
-            .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4));
-
-          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
-            .addReg(scratch).addReg(destIn)
-            .addReg(0).addImm(4));
-        }
-        srcIn = srcOut;
-        destIn = destOut;
-      }
-
-      // Handle the leftover bytes with LDRB and STRB.
-      // [scratch, srcOut] = LDRB_POST(srcIn, 1)
-      // [destOut] = STRB_POST(scratch, destIn, 1)
-      ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
-      strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
-      for (unsigned i = 0; i < BytesLeft; i++) {
-        unsigned scratch = MRI.createVirtualRegister(TRC);
-        unsigned srcOut = MRI.createVirtualRegister(TRC);
-        unsigned destOut = MRI.createVirtualRegister(TRC);
-        if (isThumb2) {
-          AddDefaultPred(BuildMI(*BB, MI, dl,
-            TII->get(ldrOpc),scratch)
-            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
-          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
-            .addReg(scratch).addReg(destIn)
-            .addReg(0).addImm(1));
-        } else {
-          AddDefaultPred(BuildMI(*BB, MI, dl,
-            TII->get(ldrOpc),scratch)
-            .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
-          AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
-            .addReg(scratch).addReg(destIn)
-            .addReg(0).addImm(1));
-       }
-        srcIn = srcOut;
-        destIn = destOut;
-      }
-      MI->eraseFromParent();   // The instruction is gone now.
-      return BB;
-    }
-
-    // Expand the pseudo op to a loop.
-    // thisMBB:
-    //   ...
-    //   movw varEnd, # --> with thumb2
-    //   movt varEnd, #
-    //   ldrcp varEnd, idx --> without thumb2
-    //   fallthrough --> loopMBB
-    // loopMBB:
-    //   PHI varPhi, varEnd, varLoop
-    //   PHI srcPhi, src, srcLoop
-    //   PHI destPhi, dst, destLoop
-    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
-    //   [destLoop] = STR_POST(scratch, destPhi, 4)
-    //   subs varLoop, varPhi, #4
-    //   bne loopMBB
-    //   fallthrough --> exitMBB
-    // exitMBB:
-    //   epilogue to handle left-over bytes
-    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
-    //   [destOut] = STRB_POST(scratch, destLoop, 1)
-    MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-    MF->insert(It, loopMBB);
-    MF->insert(It, exitMBB);
-
-    // Transfer the remainder of BB and its successor edges to exitMBB.
-    exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
-    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    // Load an immediate to varEnd.
-    unsigned varEnd = MRI.createVirtualRegister(TRC);
-    if (isThumb2) {
-      unsigned VReg1 = varEnd;
-      if ((LoopSize & 0xFFFF0000) != 0)
-        VReg1 = MRI.createVirtualRegister(TRC);
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
-                     .addImm(LoopSize & 0xFFFF));
-
-      if ((LoopSize & 0xFFFF0000) != 0)
-        AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
-                       .addReg(VReg1)
-                       .addImm(LoopSize >> 16));
-    } else {
-      MachineConstantPool *ConstantPool = MF->getConstantPool();
-      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
-      const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
-
-      // MachineConstantPool wants an explicit alignment.
-      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
-      if (Align == 0)
-        Align = getTargetData()->getTypeAllocSize(C->getType());
-      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
-
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
-                     .addReg(varEnd, RegState::Define)
-                     .addConstantPoolIndex(Idx)
-                     .addImm(0));
-    }
-    BB->addSuccessor(loopMBB);
-
-    // Generate the loop body:
-    //   varPhi = PHI(varLoop, varEnd)
-    //   srcPhi = PHI(srcLoop, src)
-    //   destPhi = PHI(destLoop, dst)
-    MachineBasicBlock *entryBB = BB;
-    BB = loopMBB;
-    unsigned varLoop = MRI.createVirtualRegister(TRC);
-    unsigned varPhi = MRI.createVirtualRegister(TRC);
-    unsigned srcLoop = MRI.createVirtualRegister(TRC);
-    unsigned srcPhi = MRI.createVirtualRegister(TRC);
-    unsigned destLoop = MRI.createVirtualRegister(TRC);
-    unsigned destPhi = MRI.createVirtualRegister(TRC);
-
-    BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
-      .addReg(varLoop).addMBB(loopMBB)
-      .addReg(varEnd).addMBB(entryBB);
-    BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
-      .addReg(srcLoop).addMBB(loopMBB)
-      .addReg(src).addMBB(entryBB);
-    BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
-      .addReg(destLoop).addMBB(loopMBB)
-      .addReg(dest).addMBB(entryBB);
-
-    //   [scratch, srcLoop] = LDR_POST(srcPhi, 4)
-    //   [destLoop] = STR_POST(scratch, destPhi, 4)
-    unsigned scratch = MRI.createVirtualRegister(TRC);
-    if (isThumb2) {
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
-        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4));
-
-      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
-        .addReg(scratch).addReg(destPhi)
-        .addImm(4));
-    } else {
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
-        .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4));
-
-      AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
-        .addReg(scratch).addReg(destPhi)
-        .addReg(0).addImm(4));
-    }
-
-    // Decrement loop variable by 4.
-    MachineInstrBuilder MIB = BuildMI(BB, dl,
-      TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
-    AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4)));
-    MIB->getOperand(5).setReg(ARM::CPSR);
-    MIB->getOperand(5).setIsDef(true);
-
-    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
-      .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
-    // loopMBB can loop back to loopMBB or fall through to exitMBB.
-    BB->addSuccessor(loopMBB);
-    BB->addSuccessor(exitMBB);
-
-    // Add epilogue to handle BytesLeft.
-    BB = exitMBB;
-    MachineInstr *StartOfExit = exitMBB->begin();
-    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
-    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
-
-    //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
-    //   [destOut] = STRB_POST(scratch, destLoop, 1)
-    unsigned srcIn = srcLoop;
-    unsigned destIn = destLoop;
-    for (unsigned i = 0; i < BytesLeft; i++) {
-      unsigned scratch = MRI.createVirtualRegister(TRC);
-      unsigned srcOut = MRI.createVirtualRegister(TRC);
-      unsigned destOut = MRI.createVirtualRegister(TRC);
-      if (isThumb2) {
-        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
-          TII->get(ldrOpc),scratch)
-          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
-        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
-          .addReg(scratch).addReg(destIn)
-          .addImm(1));
-      } else {
-        AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
-          TII->get(ldrOpc),scratch)
-          .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
-        AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
-          .addReg(scratch).addReg(destIn)
-          .addReg(0).addImm(1));
-      }
-      srcIn = srcOut;
-      destIn = destOut;
-    }
-
-    MI->eraseFromParent();   // The instruction is gone now.
-    return BB;
-  }
+    return EmitStructByval(MI, BB);
   }
 }
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=157830&r1=157829&r2=157830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri Jun  1 14:33:18 2012
@@ -530,6 +530,9 @@
                                              MachineBasicBlock *MBB) const;
 
     bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const;
   };
 
   enum NEONModImmType {

Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=157830&r1=157829&r2=157830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Fri Jun  1 14:33:18 2012
@@ -18,9 +18,9 @@
 // Type profiles.
 def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
-def SDT_ARMStructByVal : SDTypeProfile<0, 3,
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
                                        [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
-                                        SDTCisVT<2, i32>]>;
+                                        SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
 
 def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
 
@@ -4174,9 +4174,9 @@
 
 let usesCustomInserter = 1 in {
     def COPY_STRUCT_BYVAL_I32 : PseudoInst<
-      (outs), (ins GPR:$dst, GPR:$src, i32imm:$size),
+      (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
       NoItinerary,
-      [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>;
+      [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
 }
 
 let mayLoad = 1 in {





More information about the llvm-commits mailing list