Thu Jan 18 23:51:57 PST 2007

Changes in directory llvm/lib/Target/ARM:

ARMAddressingModes.h added (r1.1)
ARMConstantIslandPass.cpp added (r1.1)
ARMConstantPoolValue.cpp added (r1.1)
ARMConstantPoolValue.h added (r1.1)
ARMISelLowering.cpp added (r1.1)
ARMISelLowering.h added (r1.1)
ARMInstrThumb.td added (r1.1)
ARMInstrVFP.td added (r1.1)
ARMLoadStoreOptimizer.cpp added (r1.1)
ARMMachineFunctionInfo.h added (r1.1)
ARMSubtarget.cpp added (r1.1)
ARMSubtarget.h added (r1.1)
README-Thumb.txt added (r1.1)
ARM.h updated: 1.7 -> 1.8
ARM.td updated: 1.3 -> 1.4
ARMAsmPrinter.cpp updated: 1.42 -> 1.43
ARMFrameInfo.h updated: 1.2 -> 1.3
ARMISelDAGToDAG.cpp updated: 1.98 -> 1.99
ARMInstrInfo.cpp updated: 1.10 -> 1.11
ARMInstrInfo.h updated: 1.4 -> 1.5
ARMInstrInfo.td updated: 1.83 -> 1.84
ARMRegisterInfo.cpp updated: 1.33 -> 1.34
ARMRegisterInfo.h updated: 1.4 -> 1.5
ARMRegisterInfo.td updated: 1.6 -> 1.7
ARMTargetAsmInfo.cpp updated: 1.3 -> 1.4
ARMTargetMachine.cpp updated: 1.13 -> 1.14
ARMTargetMachine.h updated: 1.6 -> 1.7
Makefile updated: 1.1 -> 1.2
README.txt updated: 1.9 -> 1.10
ARMCommon.cpp (r1.1) removed
ARMCommon.h (r1.1) removed
ARMMul.cpp (r1.4) removed
---
Log message:

ARM backend contribution from Apple.

---
Diffs of the changes:  (+8698 -1790)

 ARM.h                     |   98 ++-
 ARM.td                    |   77 ++
 ARMAddressingModes.h      |  394 ++++++++++++
 ARMAsmPrinter.cpp         |  854 +++++++++++++++++++++------
 ARMConstantIslandPass.cpp |  490 +++++++++++++++
 ARMConstantPoolValue.cpp  |   55 +
 ARMConstantPoolValue.h    |   50 +
 ARMFrameInfo.h            |   10 
 ARMISelDAGToDAG.cpp       | 1438 ++++++++++++++--------------------------------
 ARMISelLowering.cpp       | 1414 +++++++++++++++++++++++++++++++++++++++++++++
 ARMISelLowering.h         |  134 ++++
 ARMInstrInfo.cpp          |  405 ++++++++++++
 ARMInstrInfo.h            |   69 ++
 ARMInstrInfo.td           | 1355 ++++++++++++++++++++++++++++++++++---------
 ARMInstrThumb.td          |  513 ++++++++++++++++
 ARMInstrVFP.td            |  359 +++++++++++
 ARMLoadStoreOptimizer.cpp |  628 ++++++++++++++++++++
 ARMMachineFunctionInfo.h  |  136 ++++
 ARMRegisterInfo.cpp       | 1030 ++++++++++++++++++++++++++++----
 ARMRegisterInfo.h         |   35 -
 ARMRegisterInfo.td        |  250 ++++---
 ARMSubtarget.cpp          |   52 +
 ARMSubtarget.h            |   82 ++
 ARMTargetAsmInfo.cpp      |   48 +
 ARMTargetMachine.cpp      |   31 
 ARMTargetMachine.h        |   17 
 Makefile                  |    4 
 README-Thumb.txt          |   17 
 README.txt                |  436 ++++++++++++-
 29 files changed, 8698 insertions(+), 1783 deletions(-)


Index: llvm/lib/Target/ARM/ARMAddressingModes.h
diff -c /dev/null llvm/lib/Target/ARM/ARMAddressingModes.h:1.1
*** /dev/null	Fri Jan 19 01:51:52 2007

--- llvm/lib/Target/ARM/ARMAddressingModes.h	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,394 ----
+ //===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file contains the ARM addressing mode implementation stuff.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+ #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+ 
+ #include "llvm/CodeGen/SelectionDAGNodes.h"
+ #include "llvm/Support/MathExtras.h"
+ #include <cassert>
+ 
+ namespace llvm {
+   
+ /// ARM_AM - ARM Addressing Mode Stuff
+ namespace ARM_AM {
+   enum ShiftOpc {
+     no_shift = 0,
+     asr,
+     lsl,
+     lsr,
+     ror,
+     rrx
+   };
+   
+   enum AddrOpc {
+     add = '+', sub = '-'
+   };
+   
+   static inline const char *getShiftOpcStr(ShiftOpc Op) {
+     switch (Op) {
+     default: assert(0 && "Unknown shift opc!");
+     case ARM_AM::asr: return "asr";
+     case ARM_AM::lsl: return "lsl";
+     case ARM_AM::lsr: return "lsr";
+     case ARM_AM::ror: return "ror";
+     case ARM_AM::rrx: return "rrx";
+     }
+   }
+   
+   static inline ShiftOpc getShiftOpcForNode(SDOperand N) {
+     switch (N.getOpcode()) {
+     default:          return ARM_AM::no_shift;
+     case ISD::SHL:    return ARM_AM::lsl;
+     case ISD::SRL:    return ARM_AM::lsr;
+     case ISD::SRA:    return ARM_AM::asr;
+     case ISD::ROTR:   return ARM_AM::ror;
+     //case ISD::ROTL:  // Only if imm -> turn into ROTR.
+     // Can't handle RRX here, because it would require folding a flag into
+     // the addressing mode.  :(  This causes us to miss certain things.
+     //case ARMISD::RRX: return ARM_AM::rrx;
+     }
+   }
+ 
+   enum AMSubMode {
+     bad_am_submode = 0,
+     ia,
+     ib,
+     da,
+     db
+   };
+ 
+   static inline const char *getAMSubModeStr(AMSubMode Mode) {
+     switch (Mode) {
+     default: assert(0 && "Unknown addressing sub-mode!");
+     case ARM_AM::ia: return "ia";
+     case ARM_AM::ib: return "ib";
+     case ARM_AM::da: return "da";
+     case ARM_AM::db: return "db";
+     }
+   }
+ 
+   static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+     switch (Mode) {
+     default: assert(0 && "Unknown addressing sub-mode!");
+     case ARM_AM::ia: return isLD ? "fd" : "ea";
+     case ARM_AM::ib: return isLD ? "ed" : "fa";
+     case ARM_AM::da: return isLD ? "fa" : "ed";
+     case ARM_AM::db: return isLD ? "ea" : "fd";
+     }
+   }
+ 
+   /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+   ///
+   static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+     assert(Amt < 32 && "Invalid rotate amount");
+     return (Val >> Amt) | (Val << ((32-Amt)&31));
+   }
+   
+   /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+   ///
+   static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+     assert(Amt < 32 && "Invalid rotate amount");
+     return (Val << Amt) | (Val >> ((32-Amt)&31));
+   }
+   
+   //===--------------------------------------------------------------------===//
+   // Addressing Mode #1: shift_operand with registers
+   //===--------------------------------------------------------------------===//
+   //
+   // This 'addressing mode' is used for arithmetic instructions.  It can
+   // represent things like:
+   //   reg
+   //   reg [asr|lsl|lsr|ror|rrx] reg
+   //   reg [asr|lsl|lsr|ror|rrx] imm
+   //
+   // This is stored three operands [rega, regb, opc].  The first is the base
+   // reg, the second is the shift amount (or reg0 if not present or imm).  The
+   // third operand encodes the shift opcode and the imm if a reg isn't present.
+   //
+   static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+     return ShOp | (Imm << 3);
+   }
+   static inline unsigned getSORegOffset(unsigned Op) {
+     return Op >> 3;
+   }
+   static inline ShiftOpc getSORegShOp(unsigned Op) {
+     return (ShiftOpc)(Op & 7);
+   }
+ 
+   /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+   /// the 8-bit imm value.
+   static inline unsigned getSOImmValImm(unsigned Imm) {
+     return Imm & 0xFF;
+   }
+   /// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return
+   /// the rotate amount.
+   static inline unsigned getSOImmValRot(unsigned Imm) {
+     return (Imm >> 8) * 2;
+   }
+   
+   /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+   /// computing the rotate amount to use.  If this immediate value cannot be
+   /// handled with a single shifter-op, determine a good rotate amount that will
+   /// take a maximal chunk of bits out of the immediate.
+   static inline unsigned getSOImmValRotate(unsigned Imm) {
+     // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+     // of zero.
+     if ((Imm & ~255U) == 0) return 0;
+     
+     // Use CTZ to compute the rotate amount.
+     unsigned TZ = CountTrailingZeros_32(Imm);
+     
+     // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+     // not 9.
+     unsigned RotAmt = TZ & ~1;
+     
+     // If we can handle this spread, return it.
+     if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+       return (32-RotAmt)&31;  // HW rotates right, not left.
+ 
+     // For values like 0xF000000F, we should skip the first run of ones, then
+     // retry the hunt.
+     if (Imm & 1) {
+       unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+       if (TrailingOnes != 32) {  // Avoid overflow on 0xFFFFFFFF
+         // Restart the search for a high-order bit after the initial seconds of
+         // ones.
+         unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+       
+         // Rotate amount must be even.
+         unsigned RotAmt2 = TZ2 & ~1;
+         
+         // If this fits, use it.
+         if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+           return (32-RotAmt2)&31;  // HW rotates right, not left.
+       }
+     }
+     
+     // Otherwise, we have no way to cover this span of bits with a single
+     // shifter_op immediate.  Return a chunk of bits that will be useful to
+     // handle.
+     return (32-RotAmt)&31;  // HW rotates right, not left.
+   }
+ 
+   /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+   /// into an shifter_operand immediate operand, return the 12-bit encoding for
+   /// it.  If not, return -1.
+   static inline int getSOImmVal(unsigned Arg) {
+     // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+     // of zero.
+     if ((Arg & ~255U) == 0) return Arg;
+     
+     unsigned RotAmt = getSOImmValRotate(Arg);
+ 
+     // If this cannot be handled with a single shifter_op, bail out.
+     if (rotr32(~255U, RotAmt) & Arg)
+       return -1;
+       
+     // Encode this correctly.
+     return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+   }
+   
+   /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+   /// or'ing together two SOImmVal's.
+   static inline bool isSOImmTwoPartVal(unsigned V) {
+     // If this can be handled with a single shifter_op, bail out.
+     V = rotr32(~255U, getSOImmValRotate(V)) & V;
+     if (V == 0)
+       return false;
+     
+     // If this can be handled with two shifter_op's, accept.
+     V = rotr32(~255U, getSOImmValRotate(V)) & V;
+     return V == 0;
+   }
+   
+   /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+   /// return the first chunk of it.
+   static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+     return rotr32(255U, getSOImmValRotate(V)) & V;
+   }
+ 
+   /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+   /// return the second chunk of it.
+   static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+     // Mask out the first hunk.  
+     V = rotr32(~255U, getSOImmValRotate(V)) & V;
+     
+     // Take what's left.
+     assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+     return V;
+   }
+   
+   /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+   /// by a left shift. Returns the shift amount to use.
+   static inline unsigned getThumbImmValShift(unsigned Imm) {
+     // 8-bit (or less) immediates are trivially immediate operand with a shift
+     // of zero.
+     if ((Imm & ~255U) == 0) return 0;
+ 
+     // Use CTZ to compute the shift amount.
+     return CountTrailingZeros_32(Imm);
+   }
+ 
+   /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+   /// by left shifting a 8-bit immediate.
+   static inline bool isThumbImmShiftedVal(unsigned V) {
+     // If this can be handled with 
+     V = (~255U << getThumbImmValShift(V)) & V;
+     return V == 0;
+   }
+ 
+   /// getThumbImmNonShiftedVal - If V is a value that satisfies
+   /// isThumbImmShiftedVal, return the non-shiftd value.
+   static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+     return V >> getThumbImmValShift(V);
+   }
+ 
+   //===--------------------------------------------------------------------===//
+   // Addressing Mode #2
+   //===--------------------------------------------------------------------===//
+   //
+   // This is used for most simple load/store instructions.
+   //
+   // addrmode2 := reg +/- reg shop imm
+   // addrmode2 := reg +/- imm12
+   //
+   // The first operand is always a Reg.  The second operand is a reg if in
+   // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+   // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+   //
+   // If this addressing mode is a frame index (before prolog/epilog insertion
+   // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
+   // with no shift amount for the frame offset.
+   // 
+   static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+     assert(Imm12 < (1 << 12) && "Imm too large!");
+     bool isSub = Opc == sub;
+     return Imm12 | ((int)isSub << 12) | (SO << 13);
+   }
+   static inline unsigned getAM2Offset(unsigned AM2Opc) {
+     return AM2Opc & ((1 << 12)-1);
+   }
+   static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+     return ((AM2Opc >> 12) & 1) ? sub : add;
+   }
+   static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+     return (ShiftOpc)(AM2Opc >> 13);
+   }
+   
+   
+   //===--------------------------------------------------------------------===//
+   // Addressing Mode #3
+   //===--------------------------------------------------------------------===//
+   //
+   // This is used for sign-extending loads, and load/store-pair instructions.
+   //
+   // addrmode3 := reg +/- reg
+   // addrmode3 := reg +/- imm8
+   //
+   // The first operand is always a Reg.  The second operand is a reg if in
+   // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+   // in bit 8, the immediate in bits 0-7.
+   
+   /// getAM3Opc - This function encodes the addrmode3 opc field.
+   static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+     bool isSub = Opc == sub;
+     return ((int)isSub << 8) | Offset;
+   }
+   static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+     return AM3Opc & 0xFF;
+   }
+   static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+     return ((AM3Opc >> 8) & 1) ? sub : add;
+   }
+   
+   //===--------------------------------------------------------------------===//
+   // Addressing Mode #4
+   //===--------------------------------------------------------------------===//
+   //
+   // This is used for load / store multiple instructions.
+   //
+   // addrmode4 := reg, <mode>
+   //
+   // The four modes are:
+   //    IA - Increment after
+   //    IB - Increment before
+   //    DA - Decrement after
+   //    DB - Decrement before
+   //
+   // If the 4th bit (writeback)is set, then the base register is updated after
+   // the memory transfer.
+ 
+   static inline AMSubMode getAM4SubMode(unsigned Mode) {
+     return (AMSubMode)(Mode & 0x7);
+   }
+ 
+   static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+     return (int)SubMode | ((int)WB << 3);
+   }
+ 
+   static inline bool getAM4WBFlag(unsigned Mode) {
+     return (Mode >> 3) & 1;
+   }
+ 
+   //===--------------------------------------------------------------------===//
+   // Addressing Mode #5
+   //===--------------------------------------------------------------------===//
+   //
+   // This is used for coprocessor instructions, such as FP load/stores.
+   //
+   // addrmode5 := reg +/- imm8*4
+   //
+   // The first operand is always a Reg.  The third field encodes the operation
+   // in bit 8, the immediate in bits 0-7.
+   //
+   // This can also be used for FP load/store multiple ops. The third field encodes
+   // writeback mode in bit 8, the number of registers (or 2 times the number of
+   // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+   // following two sub-modes:
+   //
+   //    IA - Increment after
+   //    DB - Decrement before
+   
+   /// getAM5Opc - This function encodes the addrmode5 opc field.
+   static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+     bool isSub = Opc == sub;
+     return ((int)isSub << 8) | Offset;
+   }
+   static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+     return AM5Opc & 0xFF;
+   }
+   static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+     return ((AM5Opc >> 8) & 1) ? sub : add;
+   }
+ 
+   /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+   /// FSTM instructions.
+   static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+                                    unsigned char Offset) {
+     assert((SubMode == ia || SubMode == db) &&
+            "Illegal addressing mode 5 sub-mode!");
+     return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+   }
+   static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+     return (AMSubMode)((AM5Opc >> 9) & 0x7);
+   }
+   static inline bool getAM5WBFlag(unsigned AM5Opc) {
+     return ((AM5Opc >> 8) & 1);
+   }
+   
+ } // end namespace ARM_AM
+ } // end namespace llvm
+ 
+ #endif
+ 


Index: llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
diff -c /dev/null llvm/lib/Target/ARM/ARMConstantIslandPass.cpp:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,490 ----
+ //===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file contains a pass that splits the constant pool up into 'islands'
+ // which are scattered through-out the function.  This is required due to the
+ // limited pc-relative displacements that ARM has.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #define DEBUG_TYPE "arm-cp-islands"
+ #include "ARM.h"
+ #include "ARMInstrInfo.h"
+ #include "llvm/CodeGen/MachineConstantPool.h"
+ #include "llvm/CodeGen/MachineFunctionPass.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/CodeGen/MachineJumpTableInfo.h"
+ #include "llvm/Target/TargetAsmInfo.h"
+ #include "llvm/Target/TargetData.h"
+ #include "llvm/Target/TargetMachine.h"
+ #include "llvm/Support/Compiler.h"
+ #include "llvm/Support/Debug.h"
+ #include "llvm/ADT/STLExtras.h"
+ #include "llvm/ADT/Statistic.h"
+ #include <iostream>
+ using namespace llvm;
+ 
+ STATISTIC(NumSplit, "Number of uncond branches inserted");
+ 
+ namespace {
+   /// ARMConstantIslands - Due to limited pc-relative displacements, ARM
+   /// requires constant pool entries to be scattered among the instructions
+   /// inside a function.  To do this, it completely ignores the normal LLVM
+   /// constant pool, instead, it places constants where-ever it feels like with
+   /// special instructions.
+   ///
+   /// The terminology used in this pass includes:
+   ///   Islands - Clumps of constants placed in the function.
+   ///   Water   - Potential places where an island could be formed.
+   ///   CPE     - A constant pool entry that has been placed somewhere, which
+   ///             tracks a list of users.
+   class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+     /// NextUID - Assign unique ID's to CPE's.
+     unsigned NextUID;
+     
+     /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+     /// by MBB Number.
+     std::vector<unsigned> BBSizes;
+     
+     /// WaterList - A sorted list of basic blocks where islands could be placed
+     /// (i.e. blocks that don't fall through to the following block, due
+     /// to a return, unreachable, or unconditional branch).
+     std::vector<MachineBasicBlock*> WaterList;
+     
+     /// CPUser - One user of a constant pool, keeping the machine instruction
+     /// pointer, the constant pool being referenced, and the max displacement
+     /// allowed from the instruction to the CP.
+     struct CPUser {
+       MachineInstr *MI;
+       MachineInstr *CPEMI;
+       unsigned MaxDisp;
+       CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+         : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+     };
+     
+     /// CPUsers - Keep track of all of the machine instructions that use various
+     /// constant pools and their max displacement.
+     std::vector<CPUser> CPUsers;
+     
+     const TargetInstrInfo *TII;
+     const TargetAsmInfo   *TAI;
+   public:
+     virtual bool runOnMachineFunction(MachineFunction &Fn);
+ 
+     virtual const char *getPassName() const {
+       return "ARM constant island placement pass";
+     }
+     
+   private:
+     void DoInitialPlacement(MachineFunction &Fn,
+                             std::vector<MachineInstr*> &CPEMIs);
+     void InitialFunctionScan(MachineFunction &Fn,
+                              const std::vector<MachineInstr*> &CPEMIs);
+     void SplitBlockBeforeInstr(MachineInstr *MI);
+     bool HandleConstantPoolUser(MachineFunction &Fn, CPUser &U);
+     void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ 
+     unsigned GetInstSize(MachineInstr *MI) const;
+     unsigned GetOffsetOf(MachineInstr *MI) const;
+   };
+ }
+ 
+ /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+ /// optimization pass.
+ FunctionPass *llvm::createARMConstantIslandPass() {
+   return new ARMConstantIslands();
+ }
+ 
+ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+   // If there are no constants, there is nothing to do.
+   MachineConstantPool &MCP = *Fn.getConstantPool();
+   if (MCP.isEmpty()) return false;
+   
+   TII = Fn.getTarget().getInstrInfo();
+   TAI = Fn.getTarget().getTargetAsmInfo();
+   
+   // Renumber all of the machine basic blocks in the function, guaranteeing that
+   // the numbers agree with the position of the block in the function.
+   Fn.RenumberBlocks();
+ 
+   // Perform the initial placement of the constant pool entries.  To start with,
+   // we put them all at the end of the function.
+   std::vector<MachineInstr*> CPEMIs;
+   DoInitialPlacement(Fn, CPEMIs);
+   
+   /// The next UID to take is the first unused one.
+   NextUID = CPEMIs.size();
+   
+   // Do the initial scan of the function, building up information about the
+   // sizes of each block, the location of all the water, and finding all of the
+   // constant pool users.
+   InitialFunctionScan(Fn, CPEMIs);
+   CPEMIs.clear();
+   
+   // Iteratively place constant pool entries until there is no change.
+   bool MadeChange;
+   do {
+     MadeChange = false;
+     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+       MadeChange |= HandleConstantPoolUser(Fn, CPUsers[i]);
+   } while (MadeChange);
+   
+   BBSizes.clear();
+   WaterList.clear();
+   CPUsers.clear();
+     
+   return true;
+ }
+ 
+ /// DoInitialPlacement - Perform the initial placement of the constant pool
+ /// entries.  To start with, we put them all at the end of the function.
+ void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+                                             std::vector<MachineInstr*> &CPEMIs){
+   // Create the basic block to hold the CPE's.
+   MachineBasicBlock *BB = new MachineBasicBlock();
+   Fn.getBasicBlockList().push_back(BB);
+   
+   // Add all of the constants from the constant pool to the end block, use an
+   // identity mapping of CPI's to CPE's.
+   const std::vector<MachineConstantPoolEntry> &CPs =
+     Fn.getConstantPool()->getConstants();
+   
+   const TargetData &TD = *Fn.getTarget().getTargetData();
+   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+     unsigned Size = TD.getTypeSize(CPs[i].getType());
+     // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
+     // we would have to pad them out or something so that instructions stay
+     // aligned.
+     assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+     MachineInstr *CPEMI =
+       BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY))
+                            .addImm(i).addConstantPoolIndex(i).addImm(Size);
+     CPEMIs.push_back(CPEMI);
+     DEBUG(std::cerr << "Moved CPI#" << i << " to end of function as #"
+                     << i << "\n");
+   }
+ }
+ 
+ /// BBHasFallthrough - Return true of the specified basic block can fallthrough
+ /// into the block immediately after it.
+ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+   // Get the next machine basic block in the function.
+   MachineFunction::iterator MBBI = MBB;
+   if (next(MBBI) == MBB->getParent()->end())  // Can't fall off end of function.
+     return false;
+   
+   MachineBasicBlock *NextBB = next(MBBI);
+   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+        E = MBB->succ_end(); I != E; ++I)
+     if (*I == NextBB)
+       return true;
+   
+   return false;
+ }
+ 
+ /// InitialFunctionScan - Do the initial scan of the function, building up
+ /// information about the sizes of each block, the location of all the water,
+ /// and finding all of the constant pool users.
+ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+                                      const std::vector<MachineInstr*> &CPEMIs) {
+   for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+        MBBI != E; ++MBBI) {
+     MachineBasicBlock &MBB = *MBBI;
+     
+     // If this block doesn't fall through into the next MBB, then this is
+     // 'water' that a constant pool island could be placed.
+     if (!BBHasFallthrough(&MBB))
+       WaterList.push_back(&MBB);
+     
+     unsigned MBBSize = 0;
+     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+          I != E; ++I) {
+       // Add instruction size to MBBSize.
+       MBBSize += GetInstSize(I);
+ 
+       // Scan the instructions for constant pool operands.
+       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+         if (I->getOperand(op).isConstantPoolIndex()) {
+           // We found one.  The addressing mode tells us the max displacement
+           // from the PC that this instruction permits.
+           unsigned MaxOffs = 0;
+           
+           // Basic size info comes from the TSFlags field.
+           unsigned TSFlags = I->getInstrDescriptor()->TSFlags;
+           switch (TSFlags & ARMII::AddrModeMask) {
+           default: 
+             // Constant pool entries can reach anything.
+             if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+               continue;
+             assert(0 && "Unknown addressing mode for CP reference!");
+           case ARMII::AddrMode1: // AM1: 8 bits << 2
+             MaxOffs = 1 << (8+2);   // Taking the address of a CP entry.
+             break;
+           case ARMII::AddrMode2:
+             MaxOffs = 1 << 12;   // +-offset_12
+             break;
+           case ARMII::AddrMode3:
+             MaxOffs = 1 << 8;   // +-offset_8
+             break;
+             // addrmode4 has no immediate offset.
+           case ARMII::AddrMode5:
+             MaxOffs = 1 << (8+2);   // +-(offset_8*4)
+             break;
+           case ARMII::AddrModeT1:
+             MaxOffs = 1 << 5;
+             break;
+           case ARMII::AddrModeT2:
+             MaxOffs = 1 << (5+1);
+             break;
+           case ARMII::AddrModeT4:
+             MaxOffs = 1 << (5+2);
+             break;
+           }
+           
+           // Remember that this is a user of a CP entry.
+           MachineInstr *CPEMI =CPEMIs[I->getOperand(op).getConstantPoolIndex()];
+           CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+           
+           // Instructions can only use one CP entry, don't bother scanning the
+           // rest of the operands.
+           break;
+         }
+     }
+     BBSizes.push_back(MBBSize);
+   }
+ }
+ 
+ /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+ static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                 unsigned JTI) DISABLE_INLINE;
+ static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                 unsigned JTI) {
+   return JT[JTI].MBBs.size();
+ }
+ 
+ /// GetInstSize - Return the size of the specified MachineInstr.
+ ///
+ unsigned ARMConstantIslands::GetInstSize(MachineInstr *MI) const {
+   // Basic size info comes from the TSFlags field.
+   unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
+   
+   switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+   default:
+     // If this machine instr is an inline asm, measure it.
+     if (MI->getOpcode() == ARM::INLINEASM)
+       return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+     assert(0 && "Unknown or unset size field for instr!");
+     break;
+   case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
+   case ARMII::Size4Bytes: return 4;          // Arm instruction.
+   case ARMII::Size2Bytes: return 2;          // Thumb instruction.
+   case ARMII::SizeSpecial: {
+     switch (MI->getOpcode()) {
+     case ARM::CONSTPOOL_ENTRY:
+       // If this machine instr is a constant pool entry, its size is recorded as
+       // operand #2.
+       return MI->getOperand(2).getImm();
+     case ARM::BR_JTr:
+     case ARM::BR_JTm:
+     case ARM::BR_JTadd: {
+       // These are jumptable branches, i.e. a branch followed by an inlined
+       // jumptable. The size is 4 + 4 * number of entries.
+       unsigned JTI = MI->getOperand(MI->getNumOperands()-2).getJumpTableIndex();
+       const MachineFunction *MF = MI->getParent()->getParent();
+       MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+       const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+       assert(JTI < JT.size());
+       return getNumJTEntries(JT, JTI) * 4 + 4;
+     }
+     default:
+       // Otherwise, pseudo-instruction sizes are zero.
+       return 0;
+     }
+   }
+   }
+ }
+ 
+ /// GetOffsetOf - Return the current offset of the specified machine instruction
+ /// from the start of the function.  This offset changes as stuff is moved
+ /// around inside the function.
+ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+   MachineBasicBlock *MBB = MI->getParent();
+   
+   // The offset is composed of two things: the sum of the sizes of all MBB's
+   // before this instruction's block, and the offset from the start of the block
+   // it is in.
+   unsigned Offset = 0;
+   
+   // Sum block sizes before MBB.
+   for (unsigned BB = 0, e = MBB->getNumber(); BB != e; ++BB)
+     Offset += BBSizes[BB];
+ 
+   // Sum instructions before MI in MBB.
+   for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+     if (&*I == MI) return Offset;
+     Offset += GetInstSize(I);
+   }
+ }
+ 
+ /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+ /// ID.
+ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+                               const MachineBasicBlock *RHS) {
+   return LHS->getNumber() < RHS->getNumber();
+ }
+ 
+ /// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+ /// machine function, it upsets all of the block numbers.  Renumber the blocks
+ /// and update the arrays that parallel this numbering.
+ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+   // Renumber the MBB's to keep them consequtive.
+   NewBB->getParent()->RenumberBlocks(NewBB);
+   
+   // Insert a size into BBSizes to align it properly with the (newly
+   // renumbered) block numbers.
+   BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+   
+   // Next, update WaterList.  Specifically, we need to add NewMBB as having 
+   // available water after it.
+   std::vector<MachineBasicBlock*>::iterator IP =
+     std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+                      CompareMBBNumbers);
+   WaterList.insert(IP, NewBB);
+ }
+ 
+ 
+ /// Split the basic block containing MI into two blocks, which are joined by
+ /// an unconditional branch.  Update datastructures and renumber blocks to
+ /// account for this change.
+ void ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+   MachineBasicBlock *OrigBB = MI->getParent();
+ 
+   // Create a new MBB for the code after the OrigBB.
+   MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock());
+   MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+   OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB);
+   
+   // Splice the instructions starting with MI over to NewBB.
+   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+   
+   // Add an unconditional branch from OrigBB to NewBB.
+   BuildMI(OrigBB, TII->get(ARM::B)).addMBB(NewBB);
+   NumSplit++;
+   
+   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+   while (!OrigBB->succ_empty()) {
+     MachineBasicBlock *Succ = *OrigBB->succ_begin();
+     OrigBB->removeSuccessor(Succ);
+     NewBB->addSuccessor(Succ);
+     
+     // This pass should be run after register allocation, so there should be no
+     // PHI nodes to update.
+     assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+            && "PHI nodes should be eliminated by now!");
+   }
+   
+   // OrigBB branches to NewBB.
+   OrigBB->addSuccessor(NewBB);
+   
+   // Update internal data structures to account for the newly inserted MBB.
+   UpdateForInsertedWaterBlock(NewBB);
+   
+   // Figure out how large the first NewMBB is.
+   unsigned NewBBSize = 0;
+   for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+        I != E; ++I)
+     NewBBSize += GetInstSize(I);
+   
+   // Set the size of NewBB in BBSizes.
+   BBSizes[NewBB->getNumber()] = NewBBSize;
+   
+   // We removed instructions from UserMBB, subtract that off from its size.
+   // Add 4 to the block to count the unconditional branch we added to it.
+   BBSizes[OrigBB->getNumber()] -= NewBBSize-4;
+ }
+ 
+ /// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+ /// is out-of-range.  If so, pick it up the constant pool value and move it some
+ /// place in-range.
+ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, CPUser &U){
+   MachineInstr *UserMI = U.MI;
+   MachineInstr *CPEMI  = U.CPEMI;
+ 
+   unsigned UserOffset = GetOffsetOf(UserMI);
+   unsigned CPEOffset  = GetOffsetOf(CPEMI);
+   
+   DEBUG(std::cerr << "User of CPE#" << CPEMI->getOperand(0).getImm()
+                   << " max delta=" << U.MaxDisp
+                   << " at offset " << int(UserOffset-CPEOffset) << "\t"
+                   << *UserMI);
+ 
+   // Check to see if the CPE is already in-range.
+   if (UserOffset < CPEOffset) {
+     // User before the CPE.
+     if (CPEOffset-UserOffset <= U.MaxDisp)
+       return false;
+   } else {
+     if (UserOffset-CPEOffset <= U.MaxDisp)
+       return false;
+   }
+   
+  
+   // Solution guaranteed to work: split the user's MBB right before the user and
+   // insert a clone the CPE into the newly created water.
+   
+   // If the user isn't at the start of its MBB, or if there is a fall-through
+   // into the user's MBB, split the MBB before the User.
+   MachineBasicBlock *UserMBB = UserMI->getParent();
+   if (&UserMBB->front() != UserMI ||
+       UserMBB == &Fn.front() || // entry MBB of function.
+       BBHasFallthrough(prior(MachineFunction::iterator(UserMBB)))) {
+     // TODO: Search for the best place to split the code.  In practice, using
+     // loop nesting information to insert these guys outside of loops would be
+     // sufficient.    
+     SplitBlockBeforeInstr(UserMI);
+     
+     // UserMI's BB may have changed.
+     UserMBB = UserMI->getParent();
+   }
+   
+   // Okay, we know we can put an island before UserMBB now, do it!
+   MachineBasicBlock *NewIsland = new MachineBasicBlock();
+   Fn.getBasicBlockList().insert(UserMBB, NewIsland);
+ 
+   // Update internal data structures to account for the newly inserted MBB.
+   UpdateForInsertedWaterBlock(NewIsland);
+ 
+   // Now that we have an island to add the CPE to, clone the original CPE and
+   // add it to the island.
+   unsigned ID  = NextUID++;
+   unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
+   unsigned Size = CPEMI->getOperand(2).getImm();
+   
+   // Build a new CPE for this user.
+   U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY))
+                 .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+   
+   // Increase the size of the island block to account for the new entry.
+   BBSizes[NewIsland->getNumber()] += Size;
+   
+   // Finally, change the CPI in the instruction operand to be ID.
+   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+     if (UserMI->getOperand(i).isConstantPoolIndex()) {
+       UserMI->getOperand(i).setConstantPoolIndex(ID);
+       break;
+     }
+       
+   DEBUG(std::cerr << "  Moved CPE to #" << ID << " CPI=" << CPI << "\t"
+                   << *UserMI);
+   
+       
+   return true;
+ }
+ 


Index: llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
diff -c /dev/null llvm/lib/Target/ARM/ARMConstantPoolValue.cpp:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMConstantPoolValue.cpp	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,55 ----
+ //===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the ARM specific constantpool value class.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "ARMConstantPoolValue.h"
+ #include "llvm/ADT/FoldingSet.h"
+ #include "llvm/GlobalValue.h"
+ using namespace llvm;
+ 
+ ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+                                          bool isNonLazy, unsigned char PCAdj)
+   : MachineConstantPoolValue((const Type*)gv->getType()),
+     GV(gv), LabelId(id), isNonLazyPtr(isNonLazy), PCAdjust(PCAdj) {}
+ 
+ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+                                                     unsigned Alignment) {
+   unsigned AlignMask = (1 << Alignment)-1;
+   const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+   for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+     if (Constants[i].isMachineConstantPoolEntry() &&
+         (Constants[i].Offset & AlignMask) == 0) {
+       ARMConstantPoolValue *CPV =
+         (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+       if (CPV->GV == GV && CPV->LabelId == LabelId &&
+           CPV->isNonLazyPtr == isNonLazyPtr)
+         return i;
+     }
+   }
+ 
+   return -1;
+ }
+ 
+ void
+ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+   ID.AddPointer(GV);
+   ID.AddInteger(LabelId);
+   ID.AddInteger((unsigned)isNonLazyPtr);
+   ID.AddInteger(PCAdjust);
+ }
+ 
+ void ARMConstantPoolValue::print(std::ostream &O) const {
+   O << GV->getName();
+   if (isNonLazyPtr) O << "$non_lazy_ptr";
+   if (PCAdjust != 0) O << "-(LPIC" << LabelId << "+"
+                        << (unsigned)PCAdjust << ")";
+ }


Index: llvm/lib/Target/ARM/ARMConstantPoolValue.h
diff -c /dev/null llvm/lib/Target/ARM/ARMConstantPoolValue.h:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMConstantPoolValue.h	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,50 ----
+ //===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the ARM specific constantpool value class.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+ 
+ #include "llvm/CodeGen/MachineConstantPool.h"
+ 
+ namespace llvm {
+ 
+ /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+ /// represent PC relative displacement between the address of the load
+ /// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+ class ARMConstantPoolValue : public MachineConstantPoolValue {
+   GlobalValue *GV;         // GlobalValue being loaded.
+   unsigned LabelId;        // Label id of the load.
+   bool isNonLazyPtr;       // True if loading a Mac OS X non_lazy_ptr stub.
+   unsigned char PCAdjust;  // Extra adjustment if constantpool is pc relative.
+                            // 8 for ARM, 4 for Thumb.
+ 
+ public:
+   ARMConstantPoolValue(GlobalValue *gv, unsigned id, bool isNonLazy = false,
+                        unsigned char PCAdj = 0);
+ 
+   GlobalValue *getGV() const { return GV; }
+   unsigned getLabelId() const { return LabelId; }
+   bool isNonLazyPointer() const { return isNonLazyPtr; }
+   unsigned char getPCAdjustment() const { return PCAdjust; }
+ 
+   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                         unsigned Alignment);
+ 
+   virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+ 
+   virtual void print(std::ostream &O) const;
+ };
+   
+ }
+ 
+ #endif


Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
diff -c /dev/null llvm/lib/Target/ARM/ARMISelLowering.cpp:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMISelLowering.cpp	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,1414 ----
+ //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file defines the interfaces that ARM uses to lower LLVM code into a
+ // selection DAG.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "ARM.h"
+ #include "ARMAddressingModes.h"
+ #include "ARMConstantPoolValue.h"
+ #include "ARMISelLowering.h"
+ #include "ARMMachineFunctionInfo.h"
+ #include "ARMRegisterInfo.h"
+ #include "ARMSubtarget.h"
+ #include "ARMTargetMachine.h"
+ #include "llvm/CallingConv.h"
+ #include "llvm/Constants.h"
+ #include "llvm/CodeGen/MachineBasicBlock.h"
+ #include "llvm/CodeGen/MachineFrameInfo.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/CodeGen/SelectionDAG.h"
+ #include "llvm/CodeGen/SSARegMap.h"
+ #include "llvm/ADT/VectorExtras.h"
+ using namespace llvm;
+ 
+ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+     : TargetLowering(TM), ARMPCLabelIndex(0) {
+   Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ 
+   // Uses VFP for Thumb libfuncs if available.
+   if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+     // Single-precision floating-point arithmetic.
+     setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+     setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+     setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+     setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+ 
+     // Double-precision floating-point arithmetic.
+     setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+     setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+     setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+     setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+ 
+     // Single-precision comparisons.
+     setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+     setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+     setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+     setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+     setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+     setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+     setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
+ 
+     // Double-precision comparisons.
+     setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+     setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+     setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+     setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+     setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+     setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+     setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
+ 
+     // Floating-point to integer conversions.
+     // i64 conversions are done via library routines even when generating VFP
+     // instructions, so use the same ones.
+     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+ 
+     // Conversions between floating types.
+     setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+     setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
+ 
+     // Integer to floating-point conversions.
+     // i64 conversions are done via library routines even when generating VFP
+     // instructions, so use the same ones.
+     // FIXME: There appears to be some naming inconsistency in ARM libgcc: e.g.
+     // __floatunsidf vs. __floatunssidfvfp.
+     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+   }
+ 
+   addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+   if (Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+     addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+   }
+ 
+   // ARM does not have f32 extending load.
+   setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
+ 
+   // ARM supports all 4 flavors of integer indexed load / store.
+   for (unsigned im = (unsigned)ISD::PRE_INC;
+        im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+     setIndexedLoadAction(im,  MVT::i1,  Legal);
+     setIndexedLoadAction(im,  MVT::i8,  Legal);
+     setIndexedLoadAction(im,  MVT::i16, Legal);
+     setIndexedLoadAction(im,  MVT::i32, Legal);
+     setIndexedStoreAction(im, MVT::i1,  Legal);
+     setIndexedStoreAction(im, MVT::i8,  Legal);
+     setIndexedStoreAction(im, MVT::i16, Legal);
+     setIndexedStoreAction(im, MVT::i32, Legal);
+   }
+ 
+   // i64 operation support.
+   if (Subtarget->isThumb()) {
+     setOperationAction(ISD::MUL,     MVT::i64, Expand);
+     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
+   } else {
+     setOperationAction(ISD::MUL,     MVT::i64, Custom);
+     setOperationAction(ISD::MULHU,   MVT::i32, Custom);
+     if (!Subtarget->hasV6Ops())
+       setOperationAction(ISD::MULHS, MVT::i32, Custom);
+   }
+   setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+   setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+   setOperationAction(ISD::SRL,       MVT::i64, Custom);
+   setOperationAction(ISD::SRA,       MVT::i64, Custom);
+ 
+   // ARM does not have ROTL.
+   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
+   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+   if (!Subtarget->hasV5TOps())
+     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ 
+   // These are expanded into libcalls.
+   setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+   setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+   setOperationAction(ISD::SREM,  MVT::i32, Expand);
+   setOperationAction(ISD::UREM,  MVT::i32, Expand);
+   
+   // Support label based line numbers.
+   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+   // FIXME - use subtarget debug flags
+   if (Subtarget->isDarwin())
+     setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
+ 
+   setOperationAction(ISD::RET,           MVT::Other, Custom);
+   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
+ 
+   // Expand mem operations genericly.
+   setOperationAction(ISD::MEMSET          , MVT::Other, Expand);
+   setOperationAction(ISD::MEMCPY          , MVT::Other, Expand);
+   setOperationAction(ISD::MEMMOVE         , MVT::Other, Expand);
+   
+   // Use the default implementation.
+   setOperationAction(ISD::VASTART           , MVT::Other, Expand);
+   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand); 
+   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
+ 
+   if (!Subtarget->hasV6Ops()) {
+     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+   }
+   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ 
+   if (Subtarget->hasVFP2() && !Subtarget->isThumb())
+     // Turn f64->i64 into FMRRD iff target supports vfp2.
+     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+   
+   setOperationAction(ISD::SETCC    , MVT::i32, Expand);
+   setOperationAction(ISD::SETCC    , MVT::f32, Expand);
+   setOperationAction(ISD::SETCC    , MVT::f64, Expand);
+   setOperationAction(ISD::SELECT   , MVT::i32, Expand);
+   setOperationAction(ISD::SELECT   , MVT::f32, Expand);
+   setOperationAction(ISD::SELECT   , MVT::f64, Expand);
+   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ 
+   setOperationAction(ISD::BRCOND   , MVT::Other, Expand);
+   setOperationAction(ISD::BR_CC    , MVT::i32,   Custom);
+   setOperationAction(ISD::BR_CC    , MVT::f32,   Custom);
+   setOperationAction(ISD::BR_CC    , MVT::f64,   Custom);
+   setOperationAction(ISD::BR_JT    , MVT::Other, Custom);
+ 
+   setOperationAction(ISD::VASTART,       MVT::Other, Custom);
+   setOperationAction(ISD::VACOPY,        MVT::Other, Expand); 
+   setOperationAction(ISD::VAEND,         MVT::Other, Expand);
+   setOperationAction(ISD::STACKSAVE,     MVT::Other, Expand); 
+   setOperationAction(ISD::STACKRESTORE,  MVT::Other, Expand);
+ 
+   // FP Constants can't be immediates.
+   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ 
+   // We don't support sin/cos/fmod/copysign
+   setOperationAction(ISD::FSIN     , MVT::f64, Expand);
+   setOperationAction(ISD::FSIN     , MVT::f32, Expand);
+   setOperationAction(ISD::FCOS     , MVT::f32, Expand);
+   setOperationAction(ISD::FCOS     , MVT::f64, Expand);
+   setOperationAction(ISD::FREM     , MVT::f64, Expand);
+   setOperationAction(ISD::FREM     , MVT::f32, Expand);
+   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+   
+   // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ 
+   setStackPointerRegisterToSaveRestore(ARM::SP);
+ 
+   setSchedulingPreference(SchedulingForRegPressure);
+   computeRegisterProperties();
+ }
+ 
+ 
+ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+   switch (Opcode) {
+   default: return 0;
+   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
+   case ARMISD::WrapperCall:   return "ARMISD::WrapperCall";
+   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+   case ARMISD::CALL:          return "ARMISD::CALL";
+   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
+   case ARMISD::tCALL:         return "ARMISD::tCALL";
+   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
+   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
+   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
+   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
+   case ARMISD::CMP:           return "ARMISD::CMP";
+   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
+   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+   case ARMISD::CMOV:          return "ARMISD::CMOV";
+   case ARMISD::CNEG:          return "ARMISD::CNEG";
+     
+   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
+   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
+   case ARMISD::SITOF:         return "ARMISD::SITOF";
+   case ARMISD::UITOF:         return "ARMISD::UITOF";
+   case ARMISD::MULHILOU:      return "ARMISD::MULHILOU";
+   case ARMISD::MULHILOS:      return "ARMISD::MULHILOS";
+ 
+   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
+   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
+   case ARMISD::RRX:           return "ARMISD::RRX";
+       
+   case ARMISD::FMRRD:         return "ARMISD::FMRRD";
+   case ARMISD::FMDRR:         return "ARMISD::FMDRR";
+   }
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ // Lowering Code
+ //===----------------------------------------------------------------------===//
+ 
+ 
+ /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+   switch (CC) {
+   default: assert(0 && "Unknown condition code!");
+   case ISD::SETNE:  return ARMCC::NE;
+   case ISD::SETEQ:  return ARMCC::EQ;
+   case ISD::SETGT:  return ARMCC::GT;
+   case ISD::SETGE:  return ARMCC::GE;
+   case ISD::SETLT:  return ARMCC::LT;
+   case ISD::SETLE:  return ARMCC::LE;
+   case ISD::SETUGT: return ARMCC::HI;
+   case ISD::SETUGE: return ARMCC::HS;
+   case ISD::SETULT: return ARMCC::LO;
+   case ISD::SETULE: return ARMCC::LS;
+   }
+ }
+ 
+ /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+ /// returns true if the operands should be inverted to form the proper
+ /// comparison.
+ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+                         ARMCC::CondCodes &CondCode2) {
+   bool Invert = false;
+   CondCode2 = ARMCC::AL;
+   switch (CC) {
+   default: assert(0 && "Unknown FP condition!");
+   case ISD::SETEQ:
+   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+   case ISD::SETGT:
+   case ISD::SETOGT: CondCode = ARMCC::GT; break;
+   case ISD::SETGE:
+   case ISD::SETOGE: CondCode = ARMCC::GE; break;
+   case ISD::SETOLT: CondCode = ARMCC::MI; break;
+   case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+   case ISD::SETO:   CondCode = ARMCC::VC; break;
+   case ISD::SETUO:  CondCode = ARMCC::VS; break;
+   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+   case ISD::SETUGT: CondCode = ARMCC::HI; break;
+   case ISD::SETUGE: CondCode = ARMCC::PL; break;
+   case ISD::SETLT:
+   case ISD::SETULT: CondCode = ARMCC::LT; break;
+   case ISD::SETLE:
+   case ISD::SETULE: CondCode = ARMCC::LE; break;
+   case ISD::SETNE:
+   case ISD::SETUNE: CondCode = ARMCC::NE; break;
+   }
+   return Invert;
+ }
+ 
+ static void
+ HowToPassArgument(MVT::ValueType ObjectVT,
+                   unsigned NumGPRs, unsigned &ObjSize, unsigned &ObjGPRs) {
+   ObjSize = 0;
+   ObjGPRs = 0;
+ 
+   switch (ObjectVT) {
+   default: assert(0 && "Unhandled argument type!");
+   case MVT::i32:
+   case MVT::f32:
+     if (NumGPRs < 4)
+       ObjGPRs = 1;
+     else
+       ObjSize = 4;
+     break;
+   case MVT::i64:
+   case MVT::f64:
+     if (NumGPRs < 3)
+       ObjGPRs = 2;
+     else if (NumGPRs == 3) {
+       ObjGPRs = 1;
+       ObjSize = 4;
+     } else
+       ObjSize = 8;
+   }
+ }
+ 
+ // This transforms a ISD::CALL node into a
+ // callseq_star <- ARMISD:CALL <- callseq_end
+ // chain
+ SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+   MVT::ValueType RetVT= Op.Val->getValueType(0);
+   SDOperand Chain    = Op.getOperand(0);
+   unsigned CallConv  = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+   assert((CallConv == CallingConv::C ||
+           CallConv == CallingConv::CSRet ||
+           CallConv == CallingConv::Fast) && "unknown calling convention");
+   SDOperand Callee   = Op.getOperand(4);
+   unsigned NumOps    = (Op.getNumOperands() - 5) / 2;
+   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
+   unsigned NumGPRs = 0;     // GPRs used for parameter passing.
+ 
+   // Count how many bytes are to be pushed on the stack.
+   unsigned NumBytes = 0;
+ 
+   // Add up all the space actually used.
+   for (unsigned i = 0; i < NumOps; ++i) {
+     unsigned ObjSize = 0;
+     unsigned ObjGPRs = 0;
+     MVT::ValueType ObjectVT = Op.getOperand(5+2*i).getValueType();
+     HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs);
+     NumBytes += ObjSize;
+     NumGPRs += ObjGPRs;
+   }
+ 
+   // Adjust the stack pointer for the new arguments...
+   // These operations are automatically eliminated by the prolog/epilog pass
+   Chain = DAG.getCALLSEQ_START(Chain,
+                                DAG.getConstant(NumBytes, MVT::i32));
+ 
+   SDOperand StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+ 
+   static const unsigned GPRArgRegs[] = {
+     ARM::R0, ARM::R1, ARM::R2, ARM::R3
+   };
+ 
+   NumGPRs = 0;
+   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+   std::vector<SDOperand> MemOpChains;
+   for (unsigned i = 0; i != NumOps; ++i) {
+     SDOperand Arg = Op.getOperand(5+2*i);
+     MVT::ValueType ArgVT = Arg.getValueType();
+ 
+     unsigned ObjSize = 0;
+     unsigned ObjGPRs = 0;
+     HowToPassArgument(ArgVT, NumGPRs, ObjSize, ObjGPRs);
+     if (ObjGPRs > 0) {
+       switch (ArgVT) {
+       default: assert(0 && "Unexpected ValueType for argument!");
+       case MVT::i32:
+         RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg));
+         break;
+       case MVT::f32: 
+         RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs],
+                                  DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg)));
+         break;
+       case MVT::i64: {
+         SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+                                    DAG.getConstant(0, getPointerTy()));
+         SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+                                    DAG.getConstant(1, getPointerTy()));
+         RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo));
+         if (ObjGPRs == 2)
+           RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi));
+         else {
+           SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+           PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+           MemOpChains.push_back(DAG.getStore(Chain, Hi, PtrOff, NULL, 0));
+         }
+         break;
+       } 
+       case MVT::f64: {
+         SDOperand Cvt = DAG.getNode(ARMISD::FMRRD,
+                                     DAG.getVTList(MVT::i32, MVT::i32),
+                                     &Arg, 1);
+         RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt));
+         if (ObjGPRs == 2)
+           RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1],
+                                               Cvt.getValue(1)));
+         else {
+           SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+           PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+           MemOpChains.push_back(DAG.getStore(Chain, Cvt.getValue(1), PtrOff,
+                                              NULL, 0));
+         }
+         break;
+       }
+       }
+     } else {
+       assert(ObjSize != 0);
+       SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+       MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+     }
+ 
+     NumGPRs += ObjGPRs;
+     ArgOffset += ObjSize;
+   }
+ 
+   if (!MemOpChains.empty())
+     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                         &MemOpChains[0], MemOpChains.size());
+ 
+   // Build a sequence of copy-to-reg nodes chained together with token chain
+   // and flag operands which copy the outgoing args into the appropriate regs.
+   SDOperand InFlag;
+   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+                              InFlag);
+     InFlag = Chain.getValue(1);
+   }
+ 
+   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+   // node so that legalize doesn't hack it.
+   bool isDirect = false;
+   bool isARMFunc = false;
+   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+     GlobalValue *GV = G->getGlobal();
+     Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+     isDirect = true;
+     bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+                   GV->hasLinkOnceLinkage());
+     bool isStub = (isExt && Subtarget->isDarwin()) &&
+                    getTargetMachine().getRelocationModel() != Reloc::Static;
+     isARMFunc = !Subtarget->isThumb() || isStub;
+     // Wrap it since tBX takes a register source operand.
+     if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps())
+       Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee);
+   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+     isDirect = true;
+     bool isStub = Subtarget->isDarwin() &&
+                   getTargetMachine().getRelocationModel() != Reloc::Static;
+     isARMFunc = !Subtarget->isThumb() || isStub;
+     // Wrap it since tBX takes a register source operand.
+     if (!Subtarget->hasV5TOps() && Subtarget->isThumb())
+       Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee);
+   }
+ 
+   std::vector<MVT::ValueType> NodeTys;
+   NodeTys.push_back(MVT::Other);   // Returns a chain
+   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+ 
+   std::vector<SDOperand> Ops;
+   Ops.push_back(Chain);
+   Ops.push_back(Callee);
+ 
+   // Add argument registers to the end of the list so that they are known live
+   // into the call.
+   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                   RegsToPass[i].second.getValueType()));
+ 
+   // FIXME: handle tail calls differently.
+   unsigned CallOpc;
+   if (Subtarget->isThumb()) {
+     if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+       CallOpc = ARMISD::CALL_NOLINK;
+     else
+       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+   } else {
+     CallOpc = (isDirect || Subtarget->hasV5TOps())
+       ? ARMISD::CALL : ARMISD::CALL_NOLINK;
+   }
+   if (InFlag.Val)
+     Ops.push_back(InFlag);
+   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
+   InFlag = Chain.getValue(1);
+ 
+   SDOperand CSOps[] = { Chain, DAG.getConstant(NumBytes, MVT::i32), InFlag };
+   Chain = DAG.getNode(ISD::CALLSEQ_END, 
+                       DAG.getNodeValueTypes(MVT::Other, MVT::Flag),
+                       ((RetVT != MVT::Other) ? 2 : 1), CSOps, 3);
+   if (RetVT != MVT::Other)
+     InFlag = Chain.getValue(1);
+ 
+   std::vector<SDOperand> ResultVals;
+   NodeTys.clear();
+ 
+   // If the call has results, copy the values out of the ret val registers.
+   switch (RetVT) {
+   default: assert(0 && "Unexpected ret value!");
+   case MVT::Other:
+     break;
+   case MVT::i32:
+     Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+     ResultVals.push_back(Chain.getValue(0));
+     if (Op.Val->getValueType(1) == MVT::i32) {
+       // Returns a i64 value.
+       Chain = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32,
+                                  Chain.getValue(2)).getValue(1);
+       ResultVals.push_back(Chain.getValue(0));
+       NodeTys.push_back(MVT::i32);
+     }
+     NodeTys.push_back(MVT::i32);
+     break;
+   case MVT::f32:
+     Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+     ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, MVT::f32,
+                                      Chain.getValue(0)));
+     NodeTys.push_back(MVT::f32);
+     break;
+   case MVT::f64: {
+     SDOperand Lo = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag);
+     SDOperand Hi = DAG.getCopyFromReg(Lo, ARM::R1, MVT::i32, Lo.getValue(2));
+     ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, MVT::f64, Lo, Hi));
+     NodeTys.push_back(MVT::f64);
+     break;
+   }
+   }
+ 
+   NodeTys.push_back(MVT::Other);
+ 
+   if (ResultVals.empty())
+     return Chain;
+ 
+   ResultVals.push_back(Chain);
+   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0],
+                               ResultVals.size());
+   return Res.getValue(Op.ResNo);
+ }
+ 
+ static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
+   SDOperand Copy;
+   SDOperand Chain = Op.getOperand(0);
+   switch(Op.getNumOperands()) {
+   default:
+     assert(0 && "Do not know how to return this many arguments!");
+     abort();
+   case 1: {
+     SDOperand LR = DAG.getRegister(ARM::LR, MVT::i32);
+     return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain);
+   }
+   case 3:
+     Op = Op.getOperand(1);
+     if (Op.getValueType() == MVT::f32) {
+       Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+     } else if (Op.getValueType() == MVT::f64) {
+       // Recursively legalize f64 -> i64.
+       Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Op);
+       return DAG.getNode(ISD::RET, MVT::Other, Chain, Op,
+                          DAG.getConstant(0, MVT::i32));
+     }
+     Copy = DAG.getCopyToReg(Chain, ARM::R0, Op, SDOperand());
+     if (DAG.getMachineFunction().liveout_empty())
+       DAG.getMachineFunction().addLiveOut(ARM::R0);
+     break;
+   case 5:
+     Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand());
+     Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1));
+     // If we haven't noted the R0+R1 are live out, do so now.
+     if (DAG.getMachineFunction().liveout_empty()) {
+       DAG.getMachineFunction().addLiveOut(ARM::R0);
+       DAG.getMachineFunction().addLiveOut(ARM::R1);
+     }
+     break;
+   }
+ 
+   //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag
+   return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+ }
+ 
+ // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 
+ // their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+ // one of the above mentioned nodes. It has to be wrapped because otherwise
+ // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+ // be used to form addressing mode. These wrapped nodes will be selected
+ // into MOVri.
+ static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
+   MVT::ValueType PtrVT = Op.getValueType();
+   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+   SDOperand Res;
+   if (CP->isMachineConstantPoolEntry())
+     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                     CP->getAlignment());
+   else
+     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                     CP->getAlignment());
+   return DAG.getNode(ARMISD::Wrapper, MVT::i32, Res);
+ }
+ 
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+ /// even in dynamic-no-pic mode.
+ static bool GVIsIndirectSymbol(GlobalValue *GV) {
+   return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+           (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
+ }
+ 
+ SDOperand ARMTargetLowering::LowerGlobalAddress(SDOperand Op,
+                                                 SelectionDAG &DAG) {
+   MVT::ValueType PtrVT = getPointerTy();
+   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+   bool IsIndirect = Subtarget->isDarwin() && GVIsIndirectSymbol(GV);
+   SDOperand CPAddr;
+   if (RelocM == Reloc::Static)
+     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+   else {
+     unsigned PCAdj = (RelocM != Reloc::PIC_)
+       ? 0 : (Subtarget->isThumb() ? 4 : 8);
+     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+                                                          IsIndirect, PCAdj);
+     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+   }
+   CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ 
+   SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+   SDOperand Chain = Result.getValue(1);
+ 
+   if (RelocM == Reloc::PIC_) {
+     SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+     Result = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel);
+   }
+   if (IsIndirect)
+     Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0);
+ 
+   return Result;
+ }
+ 
+ static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
+                               unsigned VarArgsFrameIndex) {
+   // vastart just stores the address of the VarArgsFrameIndex slot into the
+   // memory location argument.
+   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+   SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+   return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
+                       SV->getOffset());
+ }
+ 
+ static SDOperand LowerFORMAL_ARGUMENT(SDOperand Op, SelectionDAG &DAG,
+ 				      unsigned *vRegs, unsigned ArgNo,
+                                       unsigned &NumGPRs, unsigned &ArgOffset) {
+   MachineFunction &MF = DAG.getMachineFunction();
+   MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
+   SDOperand Root = Op.getOperand(0);
+   std::vector<SDOperand> ArgValues;
+   SSARegMap *RegMap = MF.getSSARegMap();
+ 
+   static const unsigned GPRArgRegs[] = {
+     ARM::R0, ARM::R1, ARM::R2, ARM::R3
+   };
+ 
+   unsigned ObjSize = 0;
+   unsigned ObjGPRs = 0;
+   HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs);
+ 
+   SDOperand ArgValue;
+   if (ObjGPRs == 1) {
+     unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+     MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+     vRegs[NumGPRs] = VReg;
+     ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+     if (ObjectVT == MVT::f32)
+       ArgValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, ArgValue);
+   } else if (ObjGPRs == 2) {
+     unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+     MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+     vRegs[NumGPRs] = VReg;
+     ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ 
+     VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+     MF.addLiveIn(GPRArgRegs[NumGPRs+1], VReg);
+     vRegs[NumGPRs+1] = VReg;
+     SDOperand ArgValue2 = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ 
+     if (ObjectVT == MVT::i64)
+       ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+     else
+       ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+   }
+   NumGPRs += ObjGPRs;
+ 
+   if (ObjSize) {
+     // If the argument is actually used, emit a load from the right stack
+     // slot.
+     if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
+       MachineFrameInfo *MFI = MF.getFrameInfo();
+       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+       SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
+       if (ObjGPRs == 0)
+         ArgValue = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+       else {
+         SDOperand ArgValue2 =
+           DAG.getLoad(MVT::i32, Root, FIN, NULL, 0);
+         if (ObjectVT == MVT::i64)
+           ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+         else
+           ArgValue= DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+       }
+     } else {
+       // Don't emit a dead load.
+       ArgValue = DAG.getNode(ISD::UNDEF, ObjectVT);
+     }
+ 
+     ArgOffset += ObjSize;   // Move on to the next argument.
+   }
+ 
+   return ArgValue;
+ }
+ 
+ SDOperand
+ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
+   std::vector<SDOperand> ArgValues;
+   SDOperand Root = Op.getOperand(0);
+   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
+   unsigned NumGPRs = 0;     // GPRs used for parameter passing.
+   unsigned VRegs[4];
+ 
+   unsigned NumArgs = Op.Val->getNumValues()-1;
+   for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+     ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, VRegs, ArgNo,
+                                              NumGPRs, ArgOffset));
+ 
+   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+   if (isVarArg) {
+     static const unsigned GPRArgRegs[] = {
+       ARM::R0, ARM::R1, ARM::R2, ARM::R3
+     };
+ 
+     MachineFunction &MF = DAG.getMachineFunction();
+     SSARegMap *RegMap = MF.getSSARegMap();
+     MachineFrameInfo *MFI = MF.getFrameInfo();
+     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+     unsigned VARegSaveSize = (4 - NumGPRs) * 4;
+     if (VARegSaveSize) {
+       // If this function is vararg, store any remaining integer argument regs
+       // to their spots on the stack so that they may be loaded by deferencing
+       // the result of va_next.
+       AFI->setVarArgsRegSaveSize(VARegSaveSize);
+       VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset);
+       SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ 
+       SmallVector<SDOperand, 4> MemOps;
+       for (; NumGPRs < 4; ++NumGPRs) {
+         unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+         MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+         SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+         SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+         MemOps.push_back(Store);
+         FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+                           DAG.getConstant(4, getPointerTy()));
+       }
+       if (!MemOps.empty())
+         Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                            &MemOps[0], MemOps.size());
+     } else
+       // This will point to the next argument passed via stack.
+       VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+   }
+ 
+   ArgValues.push_back(Root);
+ 
+   // Return the new list of results.
+   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+                                     Op.Val->value_end());
+   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+ }
+ 
+ /// isFloatingPointZero - Return true if this is +0.0.
+ static bool isFloatingPointZero(SDOperand Op) {
+   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+     return CFP->isExactlyValue(0.0);
+   else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) {
+     // Maybe this has already been legalized into the constant pool?
+     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+       SDOperand WrapperOp = Op.getOperand(1).getOperand(0);
+       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+         if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+           return CFP->isExactlyValue(0.0);
+     }
+   }
+   return false;
+ }
+ 
+ static bool isLegalCmpImmediate(int C, bool isThumb) {
+   return ( isThumb && (C & ~255U) == 0) ||
+          (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+ }
+ 
+ /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+ /// the given operands.
+ static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC,
+                            SDOperand &ARMCC, SelectionDAG &DAG, bool isThumb) {
+   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.Val)) {
+     int C = (int)RHSC->getValue();
+     if (!isLegalCmpImmediate(C, isThumb)) {
+       // Constant does not fit, try adjusting it by one?
+       switch (CC) {
+       default: break;
+       case ISD::SETLT:
+       case ISD::SETULT:
+       case ISD::SETGE:
+       case ISD::SETUGE:
+         if (isLegalCmpImmediate(C-1, isThumb)) {
+           switch (CC) {
+           default: break;
+           case ISD::SETLT:  CC = ISD::SETLE;  break;
+           case ISD::SETULT: CC = ISD::SETULE; break;
+           case ISD::SETGE:  CC = ISD::SETGT;  break;
+           case ISD::SETUGE: CC = ISD::SETUGT; break;
+           }
+           RHS = DAG.getConstant(C-1, MVT::i32);
+         }
+         break;
+       case ISD::SETLE:
+       case ISD::SETULE:
+       case ISD::SETGT:
+       case ISD::SETUGT:
+         if (isLegalCmpImmediate(C+1, isThumb)) {
+           switch (CC) {
+           default: break;
+           case ISD::SETLE:  CC = ISD::SETLT;  break;
+           case ISD::SETULE: CC = ISD::SETULT; break;
+           case ISD::SETGT:  CC = ISD::SETGE;  break;
+           case ISD::SETUGT: CC = ISD::SETUGE; break;
+           }
+           RHS = DAG.getConstant(C+1, MVT::i32);
+         }
+         break;
+       }
+     }
+   }
+ 
+   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+   ARMCC = DAG.getConstant(CondCode, MVT::i32);
+   return DAG.getNode(ARMISD::CMP, MVT::Flag, LHS, RHS);
+ }
+ 
+ /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+ static SDOperand getVFPCmp(SDOperand LHS, SDOperand RHS, SelectionDAG &DAG) {
+   SDOperand Cmp;
+   if (!isFloatingPointZero(RHS))
+     Cmp = DAG.getNode(ARMISD::CMPFP, MVT::Flag, LHS, RHS);
+   else
+     Cmp = DAG.getNode(ARMISD::CMPFPw0, MVT::Flag, LHS);
+   return DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp);
+ }
+ 
+ static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG,
+                                 const ARMSubtarget *ST) {
+   MVT::ValueType VT = Op.getValueType();
+   SDOperand LHS = Op.getOperand(0);
+   SDOperand RHS = Op.getOperand(1);
+   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+   SDOperand TrueVal = Op.getOperand(2);
+   SDOperand FalseVal = Op.getOperand(3);
+ 
+   if (LHS.getValueType() == MVT::i32) {
+     SDOperand ARMCC;
+     SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+     return DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, ARMCC, Cmp);
+   }
+ 
+   ARMCC::CondCodes CondCode, CondCode2;
+   if (FPCCToARMCC(CC, CondCode, CondCode2))
+     std::swap(TrueVal, FalseVal);
+ 
+   SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+   SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+   SDOperand Result = DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal,
+                                  ARMCC, Cmp);
+   if (CondCode2 != ARMCC::AL) {
+     SDOperand ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+     // FIXME: Needs another CMP because flag can have but one use.
+     SDOperand Cmp2 = getVFPCmp(LHS, RHS, DAG);
+     Result = DAG.getNode(ARMISD::CMOV, VT, Result, TrueVal, ARMCC2, Cmp2);
+   }
+   return Result;
+ }
+ 
+ static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG,
+                             const ARMSubtarget *ST) {
+   SDOperand  Chain = Op.getOperand(0);
+   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+   SDOperand    LHS = Op.getOperand(2);
+   SDOperand    RHS = Op.getOperand(3);
+   SDOperand   Dest = Op.getOperand(4);
+ 
+   if (LHS.getValueType() == MVT::i32) {
+     SDOperand ARMCC;
+     SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+     return DAG.getNode(ARMISD::BRCOND, MVT::Other, Chain, Dest, ARMCC, Cmp);
+   }
+ 
+   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+   ARMCC::CondCodes CondCode, CondCode2;
+   if (FPCCToARMCC(CC, CondCode, CondCode2))
+     // Swap the LHS/RHS of the comparison if needed.
+     std::swap(LHS, RHS);
+   
+   SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+   SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+   SDOperand Ops[] = { Chain, Dest, ARMCC, Cmp };
+   SDOperand Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4);
+   if (CondCode2 != ARMCC::AL) {
+     ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+     SDOperand Ops[] = { Res, Dest, ARMCC, Res.getValue(1) };
+     Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4);
+   }
+   return Res;
+ }
+ 
+ SDOperand ARMTargetLowering::LowerBR_JT(SDOperand Op, SelectionDAG &DAG) {
+   SDOperand Chain = Op.getOperand(0);
+   SDOperand Table = Op.getOperand(1);
+   SDOperand Index = Op.getOperand(2);
+ 
+   MVT::ValueType PTy = getPointerTy();
+   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+   SDOperand UId =  DAG.getConstant(AFI->createJumpTableUId(), PTy);
+   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+   Table = DAG.getNode(ARMISD::WrapperJT, MVT::i32, JTI, UId);
+   Index = DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(4, PTy));
+   SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+   bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+   Addr = DAG.getLoad(isPIC ? MVT::i32 : PTy, Chain, Addr, NULL, 0);
+   Chain = Addr.getValue(1);
+   if (isPIC)
+     Addr = DAG.getNode(ISD::ADD, PTy, Addr, Table);
+   return DAG.getNode(ARMISD::BR_JT, MVT::Other, Chain, Addr, JTI, UId);
+ }
+ 
+ static SDOperand LowerFP_TO_INT(SDOperand Op, SelectionDAG &DAG) {
+   unsigned Opc =
+     Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+   Op = DAG.getNode(Opc, MVT::f32, Op.getOperand(0));
+   return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+ }
+ 
+ static SDOperand LowerINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+   MVT::ValueType VT = Op.getValueType();
+   unsigned Opc =
+     Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+ 
+   Op = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0));
+   return DAG.getNode(Opc, VT, Op);
+ }
+ 
+ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
+   // Implement fcopysign with a fabs and a conditional fneg.
+   SDOperand Tmp0 = Op.getOperand(0);
+   SDOperand Tmp1 = Op.getOperand(1);
+   MVT::ValueType VT = Op.getValueType();
+   MVT::ValueType SrcVT = Tmp1.getValueType();
+   SDOperand AbsVal = DAG.getNode(ISD::FABS, VT, Tmp0);
+   SDOperand Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG);
+   SDOperand ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+   return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, Cmp);
+ }
+ 
+ static SDOperand LowerBIT_CONVERT(SDOperand Op, SelectionDAG &DAG) {
+   // Turn f64->i64 into FMRRD.
+   assert(Op.getValueType() == MVT::i64 &&
+          Op.getOperand(0).getValueType() == MVT::f64);
+ 
+   Op = Op.getOperand(0);
+   SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32),
+                               &Op, 1);
+   
+   // Merge the pieces into a single i64 value.
+   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+ 
+ static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+   // FIXME: All this code is target-independent.  Create a new target-indep
+   // MULHILO node and move this code to the legalizer.
+   //
+   assert(Op.getValueType() == MVT::i64 && "Only handles i64 expand right now!");
+   
+   SDOperand LL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                              DAG.getConstant(0, MVT::i32));
+   SDOperand RL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+                              DAG.getConstant(0, MVT::i32));
+ 
+   const TargetLowering &TL = DAG.getTargetLoweringInfo();
+   unsigned LHSSB = TL.ComputeNumSignBits(Op.getOperand(0));
+   unsigned RHSSB = TL.ComputeNumSignBits(Op.getOperand(1));
+   
+   SDOperand Lo, Hi;
+   // Figure out how to lower this multiply.
+   if (LHSSB >= 33 && RHSSB >= 33) {
+     // If the input values are both sign extended, we can emit a mulhs+mul.
+     Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+     Hi = DAG.getNode(ISD::MULHS, MVT::i32, LL, RL);
+   } else if (LHSSB == 32 && RHSSB == 32 &&
+              TL.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) &&
+              TL.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) {
+     // If the inputs are zero extended, use mulhu.
+     Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+     Hi = DAG.getNode(ISD::MULHU, MVT::i32, LL, RL);
+   } else {
+     SDOperand LH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                                DAG.getConstant(1, MVT::i32));
+     SDOperand RH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+                                DAG.getConstant(1, MVT::i32));
+   
+     // Lo,Hi = umul LHS, RHS.
+     SDOperand Ops[] = { LL, RL };
+     SDOperand UMul64 = DAG.getNode(ARMISD::MULHILOU,
+                                    DAG.getVTList(MVT::i32, MVT::i32), Ops, 2);
+     Lo = UMul64;
+     Hi = UMul64.getValue(1);
+     RH = DAG.getNode(ISD::MUL, MVT::i32, LL, RH);
+     LH = DAG.getNode(ISD::MUL, MVT::i32, LH, RL);
+     Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, RH);
+     Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, LH);
+   }
+   
+   // Merge the pieces into a single i64 value.
+   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+ }
+ 
+ static SDOperand LowerMULHU(SDOperand Op, SelectionDAG &DAG) {
+   SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+   return DAG.getNode(ARMISD::MULHILOU,
+                      DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+ }
+ 
+ static SDOperand LowerMULHS(SDOperand Op, SelectionDAG &DAG) {
+   SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+   return DAG.getNode(ARMISD::MULHILOS,
+                      DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+ }
+ 
+ static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG,
+                           const ARMSubtarget *ST) {
+   assert(Op.getValueType() == MVT::i64 &&
+          (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+          "Unknown shift to lower!");
+   
+   // We only lower SRA, SRL of 1 here, all others use generic lowering.
+   if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
+       cast<ConstantSDNode>(Op.getOperand(1))->getValue() != 1)
+     return SDOperand();
+   
+   // If we are in thumb mode, we don't have RRX.
+   if (ST->isThumb()) return SDOperand();
+   
+   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
+   SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                              DAG.getConstant(0, MVT::i32));
+   SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+                              DAG.getConstant(1, MVT::i32));
+ 
+   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+   // captures the result into a carry flag.
+   unsigned Opc = Op.getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+   Hi = DAG.getNode(Opc, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+   
+   // The low part is an ARMISD::RRX operand, which shifts the carry in.
+   Lo = DAG.getNode(ARMISD::RRX, MVT::i32, Lo, Hi.getValue(1));
+   
+   // Merge the pieces into a single i64 value.
+   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+ }
+ 
+ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+   switch (Op.getOpcode()) {
+   default: assert(0 && "Don't know how to custom lower this!"); abort();
+   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+   case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+   case ISD::CALL:          return LowerCALL(Op, DAG);
+   case ISD::RET:           return LowerRET(Op, DAG);
+   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG, Subtarget);
+   case ISD::BR_CC:         return LowerBR_CC(Op, DAG, Subtarget);
+   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
+   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+   case ISD::SINT_TO_FP:
+   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
+   case ISD::FP_TO_SINT:
+   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
+   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
+   case ISD::BIT_CONVERT:   return LowerBIT_CONVERT(Op, DAG);
+   case ISD::MUL:           return LowerMUL(Op, DAG);
+   case ISD::MULHU:         return LowerMULHU(Op, DAG);
+   case ISD::MULHS:         return LowerMULHS(Op, DAG);
+   case ISD::SRL:
+   case ISD::SRA:           return LowerSRx(Op, DAG, Subtarget);
+   case ISD::FORMAL_ARGUMENTS:
+     return LowerFORMAL_ARGUMENTS(Op, DAG);
+   }
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //                           ARM Scheduler Hooks
+ //===----------------------------------------------------------------------===//
+ 
+ MachineBasicBlock *
+ ARMTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                            MachineBasicBlock *BB) {
+   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+   switch (MI->getOpcode()) {
+   default: assert(false && "Unexpected instr type to insert");
+   case ARM::tMOVCCr: {
+     // To "insert" a SELECT_CC instruction, we actually have to insert the
+     // diamond control-flow pattern.  The incoming instruction knows the
+     // destination vreg to set, the condition code register to branch on, the
+     // true/false values to select between, and a branch opcode to use.
+     const BasicBlock *LLVM_BB = BB->getBasicBlock();
+     ilist<MachineBasicBlock>::iterator It = BB;
+     ++It;
+ 
+     //  thisMBB:
+     //  ...
+     //   TrueVal = ...
+     //   cmpTY ccX, r1, r2
+     //   bCC copy1MBB
+     //   fallthrough --> copy0MBB
+     MachineBasicBlock *thisMBB  = BB;
+     MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+     MachineBasicBlock *sinkMBB  = new MachineBasicBlock(LLVM_BB);
+     BuildMI(BB, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+       .addImm(MI->getOperand(3).getImm());
+     MachineFunction *F = BB->getParent();
+     F->getBasicBlockList().insert(It, copy0MBB);
+     F->getBasicBlockList().insert(It, sinkMBB);
+     // Update machine-CFG edges by first adding all successors of the current
+     // block to the new block which will contain the Phi node for the select.
+     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+         e = BB->succ_end(); i != e; ++i)
+       sinkMBB->addSuccessor(*i);
+     // Next, remove all successors of the current block, and add the true
+     // and fallthrough blocks as its successors.
+     while(!BB->succ_empty())
+       BB->removeSuccessor(BB->succ_begin());
+     BB->addSuccessor(copy0MBB);
+     BB->addSuccessor(sinkMBB);
+ 
+     //  copy0MBB:
+     //   %FalseValue = ...
+     //   # fallthrough to sinkMBB
+     BB = copy0MBB;
+ 
+     // Update machine-CFG edges
+     BB->addSuccessor(sinkMBB);
+ 
+     //  sinkMBB:
+     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+     //  ...
+     BB = sinkMBB;
+     BuildMI(BB, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ 
+     delete MI;   // The pseudo instruction is gone now.
+     return BB;
+   }
+   }
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //                           ARM Optimization Hooks
+ //===----------------------------------------------------------------------===//
+ 
+ /// isLegalAddressImmediate - Return true if the integer value or
+ /// GlobalValue can be used as the offset of the target addressing mode.
+ bool ARMTargetLowering::isLegalAddressImmediate(int64_t V) const {
+   // ARM allows a 12-bit immediate field.
+   return V == V & ((1LL << 12) - 1);
+ }
+ 
+ bool ARMTargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
+   return false;
+ }
+ 
+ static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT,
+                                    bool isSEXTLoad, SDOperand &Base,
+                                    SDOperand &Offset, bool &isInc,
+                                    SelectionDAG &DAG) {
+   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+     return false;
+ 
+   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+     // AddressingMode 3
+     Base = Ptr->getOperand(0);
+     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+       int RHSC = (int)RHS->getValue();
+       if (RHSC < 0 && RHSC > -256) {
+         isInc = false;
+         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+         return true;
+       }
+     }
+     isInc = (Ptr->getOpcode() == ISD::ADD);
+     Offset = Ptr->getOperand(1);
+     return true;
+   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+     // AddressingMode 2
+     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+       int RHSC = (int)RHS->getValue();
+       if (RHSC < 0 && RHSC > -0x1000) {
+         isInc = false;
+         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+         Base = Ptr->getOperand(0);
+         return true;
+       }
+     }
+ 
+     if (Ptr->getOpcode() == ISD::ADD) {
+       isInc = true;
+       ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+       if (ShOpcVal != ARM_AM::no_shift) {
+         Base = Ptr->getOperand(1);
+         Offset = Ptr->getOperand(0);
+       } else {
+         Base = Ptr->getOperand(0);
+         Offset = Ptr->getOperand(1);
+       }
+       return true;
+     }
+ 
+     isInc = (Ptr->getOpcode() == ISD::ADD);
+     Base = Ptr->getOperand(0);
+     Offset = Ptr->getOperand(1);
+     return true;
+   }
+ 
+   // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+   return false;
+ }
+ 
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ bool
+ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+                                              SDOperand &Offset,
+                                              ISD::MemIndexedMode &AM,
+                                              SelectionDAG &DAG) {
+   if (Subtarget->isThumb())
+     return false;
+ 
+   MVT::ValueType VT;
+   SDOperand Ptr;
+   bool isSEXTLoad = false;
+   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+     Ptr = LD->getBasePtr();
+     VT  = LD->getLoadedVT();
+     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+     Ptr = ST->getBasePtr();
+     VT  = ST->getStoredVT();
+   } else
+     return false;
+ 
+   bool isInc;
+   bool isLegal = getIndexedAddressParts(Ptr.Val, VT, isSEXTLoad, Base, Offset,
+                                         isInc, DAG);
+   if (isLegal) {
+     AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+     return true;
+   }
+   return false;
+ }
+ 
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                    SDOperand &Base,
+                                                    SDOperand &Offset,
+                                                    ISD::MemIndexedMode &AM,
+                                                    SelectionDAG &DAG) {
+   if (Subtarget->isThumb())
+     return false;
+ 
+   MVT::ValueType VT;
+   SDOperand Ptr;
+   bool isSEXTLoad = false;
+   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+     VT  = LD->getLoadedVT();
+     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+     VT  = ST->getStoredVT();
+   } else
+     return false;
+ 
+   bool isInc;
+   bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                         isInc, DAG);
+   if (isLegal) {
+     AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+     return true;
+   }
+   return false;
+ }
+ 
+ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+                                                        uint64_t Mask,
+                                                        uint64_t &KnownZero, 
+                                                        uint64_t &KnownOne,
+                                                        unsigned Depth) const {
+   KnownZero = 0;
+   KnownOne = 0;
+   switch (Op.getOpcode()) {
+   default: break;
+   case ARMISD::CMOV: {
+     // Bits are known zero/one if known on the LHS and RHS.
+     ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+     if (KnownZero == 0 && KnownOne == 0) return;
+ 
+     uint64_t KnownZeroRHS, KnownOneRHS;
+     ComputeMaskedBits(Op.getOperand(1), Mask,
+                       KnownZeroRHS, KnownOneRHS, Depth+1);
+     KnownZero &= KnownZeroRHS;
+     KnownOne  &= KnownOneRHS;
+     return;
+   }
+   }
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //                           ARM Inline Assembly Support
+ //===----------------------------------------------------------------------===//
+ 
+ /// getConstraintType - Given a constraint letter, return the type of
+ /// constraint it is for this target.
+ ARMTargetLowering::ConstraintType
+ ARMTargetLowering::getConstraintType(char ConstraintLetter) const {
+   switch (ConstraintLetter) {
+     case 'l':
+       return C_RegisterClass;
+     default: return TargetLowering::getConstraintType(ConstraintLetter);
+   }
+ }
+ 
+ std::pair<unsigned, const TargetRegisterClass*> 
+ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                 MVT::ValueType VT) const {
+   if (Constraint.size() == 1) {
+     // GCC RS6000 Constraint Letters
+     switch (Constraint[0]) {
+       case 'l':
+       // FIXME: in thumb mode, 'l' is only low-regs.
+       // FALL THROUGH.
+       case 'r':
+         return std::make_pair(0U, ARM::GPRRegisterClass);
+         break;
+     }
+   }
+   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ }
+ 
+ std::vector<unsigned> ARMTargetLowering::
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                   MVT::ValueType VT) const {
+   if (Constraint.size() != 1)
+     return std::vector<unsigned>();
+ 
+   switch (Constraint[0]) {      // GCC ARM Constraint Letters
+   default: break;
+   case 'l':
+   case 'r':
+     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+                                  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+                                  ARM::R12, ARM::LR, 0);
+   }
+ 
+   return std::vector<unsigned>();
+ }


Index: llvm/lib/Target/ARM/ARMISelLowering.h
diff -c /dev/null llvm/lib/Target/ARM/ARMISelLowering.h:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMISelLowering.h	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,134 ----
+ //===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file defines the interfaces that ARM uses to lower LLVM code into a
+ // selection DAG.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef ARMISELLOWERING_H
+ #define ARMISELLOWERING_H
+ 
+ #include "llvm/Target/TargetLowering.h"
+ #include "llvm/CodeGen/SelectionDAG.h"
+ #include <vector>
+ 
+ namespace llvm {
+   class ARMConstantPoolValue;
+   class ARMSubtarget;
+ 
+   namespace ARMISD {
+     // ARM Specific DAG Nodes
+     enum NodeType {
+       // Start the numbering where the builting ops and target ops leave off.
+       FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
+ 
+       Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
+                     // TargetExternalSymbol, and TargetGlobalAddress.
+       WrapperCall,  // WrapperCall - Same as wrapper, but mark the wrapped
+                     // node as call operand.
+       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
+       
+       CALL,         // Function call.
+       CALL_NOLINK,  // Function call with branch not branch-and-link.
+       tCALL,        // Thumb function call.
+       BRCOND,       // Conditional branch.
+       BR_JT,        // Jumptable branch.
+       RET_FLAG,     // Return with a flag operand.
+ 
+       PIC_ADD,      // Add with a PC operand and a PIC label.
+ 
+       CMP,          // ARM compare instructions.
+       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
+       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
+       FMSTAT,       // ARM fmstat instruction.
+       CMOV,         // ARM conditional move instructions.
+       CNEG,         // ARM conditional negate instructions.
+       
+       FTOSI,        // FP to sint within a FP register.
+       FTOUI,        // FP to uint within a FP register.
+       SITOF,        // sint to FP within a FP register.
+       UITOF,        // uint to FP within a FP register.
+ 
+       MULHILOU,     // Lo,Hi = umul LHS, RHS.
+       MULHILOS,     // Lo,Hi = smul LHS, RHS.
+       
+       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+       RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
+       
+       FMRRD,        // double to two gprs.
+       FMDRR         // Two gprs to double.
+     };
+   }
+ 
+   //===----------------------------------------------------------------------===//
+   //  ARMTargetLowering - X86 Implementation of the TargetLowering interface
+   
+   class ARMTargetLowering : public TargetLowering {
+     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+   public:
+     ARMTargetLowering(TargetMachine &TM);
+ 
+     virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+     virtual const char *getTargetNodeName(unsigned Opcode) const;
+ 
+     virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                                        MachineBasicBlock *MBB);
+ 
+     /// isLegalAddressImmediate - Return true if the integer value or
+     /// GlobalValue can be used as the offset of the target addressing mode.
+     virtual bool isLegalAddressImmediate(int64_t V) const;
+     virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+ 
+     /// getPreIndexedAddressParts - returns true by value, base pointer and
+     /// offset pointer and addressing mode by reference if the node's address
+     /// can be legally represented as pre-indexed load / store address.
+     virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+                                            SDOperand &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG);
+ 
+     /// getPostIndexedAddressParts - returns true by value, base pointer and
+     /// offset pointer and addressing mode by reference if this node can be
+     /// combined with a load / store to form a post-indexed load / store.
+     virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                             SDOperand &Base, SDOperand &Offset,
+                                             ISD::MemIndexedMode &AM,
+                                             SelectionDAG &DAG);
+ 
+     virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+                                                 uint64_t Mask,
+                                                 uint64_t &KnownZero, 
+                                                 uint64_t &KnownOne,
+                                                 unsigned Depth) const;
+     ConstraintType getConstraintType(char ConstraintLetter) const;
+     std::pair<unsigned, const TargetRegisterClass*> 
+       getRegForInlineAsmConstraint(const std::string &Constraint,
+                                    MVT::ValueType VT) const;
+     std::vector<unsigned>
+     getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                       MVT::ValueType VT) const;
+   private:
+     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+     /// make the right decision when generating code for different targets.
+     const ARMSubtarget *Subtarget;
+ 
+     /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+     ///
+     unsigned ARMPCLabelIndex;
+ 
+     SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
+     SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
+     SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
+     SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
+   };
+ }
+ 
+ #endif  // ARMISELLOWERING_H


Index: llvm/lib/Target/ARM/ARMInstrThumb.td
diff -c /dev/null llvm/lib/Target/ARM/ARMInstrThumb.td:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMInstrThumb.td	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,513 ----
+ //===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file describes the Thumb instruction set.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ //===----------------------------------------------------------------------===//
+ // Thumb specific DAG Nodes.
+ //
+ 
+ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+                       [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ 
+ // TI - Thumb instruction.
+ 
+ // ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+ class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+   list<Predicate> Predicates = [IsThumb];
+ }
+ 
+ class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+   list<Predicate> Predicates = [IsThumb, HasV5T];
+ }
+ 
+ class ThumbI<dag ops, AddrMode am, SizeFlagVal sz,
+              string asm, string cstr, list<dag> pattern>
+   // FIXME: Set all opcodes to 0 for now.
+   : InstARM<0, am, sz, IndexModeNone, ops, asm, cstr> {
+   let Pattern = pattern;
+   list<Predicate> Predicates = [IsThumb];
+ }
+ 
+ class TI<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>;
+ class TI1<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>;
+ class TI2<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>;
+ class TI4<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>;
+ class TIs<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>;
+ 
+ // Two-address instructions
+ class TIt<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+ 
+ // BL, BLX(1) are translated by assembler into two instructions
+ class TIx2<dag ops, string asm, list<dag> pattern>
+   : ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>;
+ 
+ def imm_neg_XFORM : SDNodeXForm<imm, [{
+   return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32);
+ }]>;
+ def imm_comp_XFORM : SDNodeXForm<imm, [{
+   return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32);
+ }]>;
+ 
+ 
+ /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+ def imm0_7 : PatLeaf<(i32 imm), [{
+   return (uint32_t)N->getValue() < 8;
+ }]>;
+ def imm0_7_neg : PatLeaf<(i32 imm), [{
+   return (uint32_t)-N->getValue() < 8;
+ }], imm_neg_XFORM>;
+ 
+ def imm0_255 : PatLeaf<(i32 imm), [{
+   return (uint32_t)N->getValue() < 256;
+ }]>;
+ def imm0_255_comp : PatLeaf<(i32 imm), [{
+   return ~((uint32_t)N->getValue()) < 256;
+ }]>;
+ 
+ def imm8_255 : PatLeaf<(i32 imm), [{
+   return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256;
+ }]>;
+ def imm8_255_neg : PatLeaf<(i32 imm), [{
+   unsigned Val = -N->getValue();
+   return Val >= 8 && Val < 256;
+ }], imm_neg_XFORM>;
+ 
+ // Break imm's up into two pieces: an immediate + a left shift.
+ // This uses thumb_immshifted to match and thumb_immshifted_val and
+ // thumb_immshifted_shamt to get the val/shift pieces.
+ def thumb_immshifted : PatLeaf<(imm), [{
+   return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue());
+ }]>;
+ 
+ def thumb_immshifted_val : SDNodeXForm<imm, [{
+   unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue());
+   return CurDAG->getTargetConstant(V, MVT::i32);
+ }]>;
+ 
+ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+   unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue());
+   return CurDAG->getTargetConstant(V, MVT::i32);
+ }]>;
+ 
+ // Define Thumb specific addressing modes.
+ 
+ // t_addrmode_rr := reg + reg
+ //
+ def t_addrmode_rr : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+   let PrintMethod = "printThumbAddrModeRROperand";
+   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg);
+ }
+ 
+ // t_addrmode_ri5_{1|2|4} := reg + imm5 * {1|2|4}
+ //
+ def t_addrmode_ri5_1 : Operand<i32>,
+                        ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_1", []> {
+   let PrintMethod = "printThumbAddrModeRI5_1Operand";
+   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ }
+ def t_addrmode_ri5_2 : Operand<i32>,
+                        ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_2", []> {
+   let PrintMethod = "printThumbAddrModeRI5_2Operand";
+   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ }
+ def t_addrmode_ri5_4 : Operand<i32>,
+                        ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_4", []> {
+   let PrintMethod = "printThumbAddrModeRI5_4Operand";
+   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ }
+ 
+ // t_addrmode_sp := sp + imm8 * 4
+ //
+ def t_addrmode_sp : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+   let PrintMethod = "printThumbAddrModeSPOperand";
+   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //  Miscellaneous Instructions.
+ //
+ 
+ def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp),
+                   "\n$cp:\n\tadd $dst, pc",
+                   [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
+ 
+ //===----------------------------------------------------------------------===//
+ //  Control Flow Instructions.
+ //
+ 
+ let isReturn = 1, isTerminator = 1 in
+   def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>;
+ 
+ // FIXME: remove when we have a way to marking a MI with these properties.
+ let isLoad = 1, isReturn = 1, isTerminator = 1 in
+ def tPOP_RET : TI<(ops reglist:$dst1, variable_ops),
+                    "pop $dst1", []>;
+ 
+ let isCall = 1, noResults = 1, 
+   Defs = [R0, R1, R2, R3, LR,
+           D0, D1, D2, D3, D4, D5, D6, D7] in {
+   def tBL  : TIx2<(ops i32imm:$func, variable_ops),
+                    "bl ${func:call}",
+                    [(ARMtcall tglobaladdr:$func)]>;
+   // ARMv5T and above
+   def tBLXi : TIx2<(ops i32imm:$func, variable_ops),
+                     "blx ${func:call}",
+                     [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+   def tBLXr : TI<(ops GPR:$dst, variable_ops),
+                   "blx $dst",
+                   [(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>;
+   // ARMv4T
+   def tBX : TIx2<(ops GPR:$dst, variable_ops),
+                   "cpy lr, pc\n\tbx $dst",
+                   [(ARMcall_nolink GPR:$dst)]>;
+ }
+ 
+ let isBranch = 1, isTerminator = 1, isBarrier = 1 in
+   def tB   : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>;
+ 
+ let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
+   def tBcc : TI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
+                  [(ARMbrcond bb:$dst, imm:$cc)]>;
+ 
+ //===----------------------------------------------------------------------===//
+ //  Load Store Instructions.
+ //
+ 
+ let isLoad = 1 in {
+ def tLDRri : TI4<(ops GPR:$dst, t_addrmode_ri5_4:$addr),
+                  "ldr $dst, $addr",
+                  [(set GPR:$dst, (load t_addrmode_ri5_4:$addr))]>;
+ 
+ def tLDRrr : TI<(ops GPR:$dst, t_addrmode_rr:$addr),
+                 "ldr $dst, $addr",
+                 [(set GPR:$dst, (load t_addrmode_rr:$addr))]>;
+ // def tLDRpci
+ def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
+                   "ldr $dst, $addr",
+                   [(set GPR:$dst, (load t_addrmode_sp:$addr))]>;
+ 
+ def tLDRBri : TI1<(ops GPR:$dst, t_addrmode_ri5_1:$addr),
+                   "ldrb $dst, $addr",
+                   [(set GPR:$dst, (zextloadi8 t_addrmode_ri5_1:$addr))]>;
+ 
+ def tLDRBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
+                   "ldrb $dst, $addr",
+                   [(set GPR:$dst, (zextloadi8 t_addrmode_rr:$addr))]>;
+ 
+ def tLDRHri : TI2<(ops GPR:$dst, t_addrmode_ri5_2:$addr),
+                   "ldrh $dst, $addr",
+                   [(set GPR:$dst, (zextloadi16 t_addrmode_ri5_2:$addr))]>;
+ 
+ def tLDRHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
+                   "ldrh $dst, $addr",
+                   [(set GPR:$dst, (zextloadi16 t_addrmode_rr:$addr))]>;
+ 
+ def tLDRSBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
+                    "ldrsb $dst, $addr",
+                    [(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+ 
+ def tLDRSHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
+                    "ldrsh $dst, $addr",
+                    [(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+ } // isLoad
+ 
+ let isStore = 1 in {
+ def tSTRri : TI4<(ops GPR:$src, t_addrmode_ri5_4:$addr),
+                  "str $src, $addr",
+                  [(store GPR:$src, t_addrmode_ri5_4:$addr)]>;
+ 
+ def tSTRrr : TI<(ops GPR:$src, t_addrmode_rr:$addr),
+                  "str $src, $addr",
+                  [(store GPR:$src, t_addrmode_rr:$addr)]>;
+ 
+ def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
+                    "str $src, $addr",
+                    [(store GPR:$src, t_addrmode_sp:$addr)]>;
+ 
+ def tSTRBri : TI1<(ops GPR:$src, t_addrmode_ri5_1:$addr),
+                    "strb $src, $addr",
+                    [(truncstorei8 GPR:$src, t_addrmode_ri5_1:$addr)]>;
+ 
+ def tSTRBrr : TI1<(ops GPR:$src, t_addrmode_rr:$addr),
+                    "strb $src, $addr",
+                    [(truncstorei8 GPR:$src, t_addrmode_rr:$addr)]>;
+ 
+ def tSTRHri : TI2<(ops GPR:$src, t_addrmode_ri5_2:$addr),
+                    "strh $src, $addr",
+                    [(truncstorei16 GPR:$src, t_addrmode_ri5_1:$addr)]>;
+ 
+ def tSTRHrr : TI2<(ops GPR:$src, t_addrmode_rr:$addr),
+                    "strh $src, $addr",
+                    [(truncstorei16 GPR:$src, t_addrmode_rr:$addr)]>;
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //  Load / store multiple Instructions.
+ //
+ 
+ // TODO: A7-44: LDMIA - load multiple
+ 
+ let isLoad = 1 in
+ def tPOP : TI<(ops reglist:$dst1, variable_ops),
+                "pop $dst1", []>;
+ 
+ let isStore = 1 in
+ def tPUSH : TI<(ops reglist:$src1, variable_ops),
+                 "push $src1", []>;
+ 
+ //===----------------------------------------------------------------------===//
+ //  Arithmetic Instructions.
+ //
+ 
+ def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>;
+ 
+ def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs",
+                  [(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>;
+ 
+ def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                    "add $dst, $rhs", []>;
+ 
+ def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs),
+                   "add $dst, pc, $rhs * 4", []>;
+ def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs),
+                   "add $dst, $sp, $rhs * 4", []>;
+ def tADDspi : TI<(ops GPR:$sp, i32imm:$rhs),
+                  "add $sp, $rhs * 4", []>;
+ 
+ 
+ def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                 "and $dst, $rhs",
+                 [(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                 "asr $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>;
+ 
+ def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                  "asr $dst, $rhs",
+                  [(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                "bic $dst, $rhs",
+                [(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>;
+ 
+ 
+ def tCMN : TI<(ops GPR:$lhs, GPR:$rhs),
+               "cmn $lhs, $rhs",
+               [(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>;
+ 
+ def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmp GPR:$lhs, imm0_255:$rhs)]>;
+ 
+ def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmp GPR:$lhs, GPR:$rhs)]>;
+                
+ // TODO: A7-37: CMP(3) - cmp hi regs
+ 
+ def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                "eor $dst, $rhs",
+                [(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                 "lsl $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>;
+ 
+ def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                  "lsl $dst, $rhs",
+                  [(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                 "lsr $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>;
+ 
+ def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                  "lsr $dst, $rhs",
+                  [(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tMOVri8 : TI<(ops GPR:$dst, i32imm:$src),
+                  "mov $dst, $src",
+                  [(set GPR:$dst, imm0_255:$src)]>;
+ 
+ // TODO: A7-73: MOV(2) - mov setting flag.
+ 
+ 
+ // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+ // which is MOV(3).  This also supports high registers.
+ def tMOVrr  : TI<(ops GPR:$dst, GPR:$src),
+                  "cpy $dst, $src", []>;
+ 
+ def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                "mul $dst, $rhs",
+                [(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tMVN : TI<(ops GPR:$dst, GPR:$src),
+               "mvn $dst, $src",
+               [(set GPR:$dst, (not GPR:$src))]>;
+ 
+ def tNEG : TI<(ops GPR:$dst, GPR:$src),
+               "neg $dst, $src",
+               [(set GPR:$dst, (ineg GPR:$src))]>;
+ 
+ def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                "orr $dst, $rhs",
+                [(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>;
+ 
+ 
+ def tREV : TI<(ops GPR:$dst, GPR:$src),
+               "rev $dst, $src",
+               [(set GPR:$dst, (bswap GPR:$src))]>, 
+               Requires<[IsThumb, HasV6]>;
+ 
+ def tREV16 : TI<(ops GPR:$dst, GPR:$src),
+                 "rev16 $dst, $src",
+                 [(set GPR:$dst,
+                     (or (and (srl GPR:$src, 8), 0xFF),
+                         (or (and (shl GPR:$src, 8), 0xFF00),
+                             (or (and (srl GPR:$src, 8), 0xFF0000),
+                                 (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+                 Requires<[IsThumb, HasV6]>;
+ 
+ def tREVSH : TI<(ops GPR:$dst, GPR:$src),
+                 "revsh $dst, $src",
+                 [(set GPR:$dst,
+                    (sext_inreg
+                      (or (srl (and GPR:$src, 0xFFFF), 8),
+                          (shl GPR:$src, 8)), i16))]>,
+                 Requires<[IsThumb, HasV6]>;
+ 
+ def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                 "ror $dst, $rhs",
+                 [(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                 "sbc $dst, $rhs",
+                 [(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>;
+ 
+ // TODO: A7-96: STMIA - store multiple.
+ 
+ def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                 "sub $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>;
+                 
+ def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+                   "sub $dst, $rhs",
+                   [(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>;
+                 
+ def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+                 "sub $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>;
+ 
+ def tSUBspi : TI<(ops GPR:$sp, i32imm:$rhs),
+                  "sub $sp, $rhs * 4", []>;
+ 
+ def tSXTB  : TI<(ops GPR:$dst, GPR:$src),
+                 "sxtb $dst, $src",
+                 [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>,
+                 Requires<[IsThumb, HasV6]>;
+ def tSXTH  : TI<(ops GPR:$dst, GPR:$src),
+                 "sxth $dst, $src",
+                 [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>,
+                 Requires<[IsThumb, HasV6]>;
+ 
+ // TODO: A7-122: TST - test.
+ 
+ def tUXTB  : TI<(ops GPR:$dst, GPR:$src),
+                 "uxtb $dst, $src",
+                 [(set GPR:$dst, (and GPR:$src, 0xFF))]>,
+                 Requires<[IsThumb, HasV6]>;
+ def tUXTH  : TI<(ops GPR:$dst, GPR:$src),
+                 "uxth $dst, $src",
+                 [(set GPR:$dst, (and GPR:$src, 0xFFFF))]>, 
+                 Requires<[IsThumb, HasV6]>;
+ 
+ 
+ // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+ // Expanded by the scheduler into a branch sequence.
+ let usesCustomDAGSchedInserter = 1 in  // Expanded by the scheduler.
+   def tMOVCCr :
+   PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
+               "@ tMOVCCr $cc",
+               [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>;
+ 
+ // tLEApcrel - Load a pc-relative address into a register without offending the
+ // assembler.
+ def tLEApcrel : TI<(ops GPR:$dst, i32imm:$label),
+                     !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                           "${:private}PCRELL${:uid}+4))\n"),
+                                !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                           "add $dst, pc, #PCRELV${:uid}")),
+                     []>;
+ 
+ def tLEApcrelCall : TI<(ops GPR:$dst, i32imm:$label),
+                    !strconcat(!strconcat(".set PCRELV${:uid}, (${label:call}-(",
+                                          "${:private}PCRELL${:uid}+4))\n"),
+                               !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                          "add $dst, pc, #PCRELV${:uid}")),
+                    []>;
+ 
+ //===----------------------------------------------------------------------===//
+ // Non-Instruction Patterns
+ //
+ 
+ // ConstantPool, GlobalAddress
+ def : ThumbPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+ def : ThumbPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+ def : ThumbPat<(ARMWrapperCall tglobaladdr :$dst),
+                (tLEApcrelCall  tglobaladdr :$dst)>;
+ def : ThumbPat<(ARMWrapperCall texternalsym:$dst),
+                (tLEApcrelCall  texternalsym:$dst)>;
+ 
+ // Direct calls
+ def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+ def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+ 
+ // Indirect calls to ARM routines
+ def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>;
+ 
+ // zextload i1 -> zextload i8
+ def : ThumbPat<(zextloadi1 t_addrmode_ri5_1:$addr),
+                (tLDRBri t_addrmode_ri5_1:$addr)>;
+ def : ThumbPat<(zextloadi1 t_addrmode_rr:$addr),
+                (tLDRBri t_addrmode_rr:$addr)>;
+                   
+ // truncstore i1 -> truncstore i8
+ def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_ri5_1:$dst), 
+                (tSTRBri GPR:$src, t_addrmode_ri5_1:$dst)>;
+ def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_rr:$dst), 
+                (tSTRBrr GPR:$src, t_addrmode_rr:$dst)>;
+ 
+ // Large immediate handling.
+ 
+ // Two piece imms.
+ def : ThumbPat<(i32 thumb_immshifted:$src),
+                (tLSLri (tMOVri8 (thumb_immshifted_val imm:$src)),
+                        (thumb_immshifted_shamt imm:$src))>;
+ 
+ def : ThumbPat<(i32 imm0_255_comp:$src),
+                (tMVN (tMOVri8 (imm_comp_XFORM imm:$src)))>;


Index: llvm/lib/Target/ARM/ARMInstrVFP.td
diff -c /dev/null llvm/lib/Target/ARM/ARMInstrVFP.td:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMInstrVFP.td	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,359 ----
+ //===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file describes the ARM VP instruction set.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ //===----------------------------------------------------------------------===//
+ // ARM VFP Instruction templates.
+ //
+ 
+ // ARM Float Instruction
+ class ASI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
+   // TODO: Mark the instructions with the appropriate subtarget info.
+ }
+ 
+ class ASI5<dag ops, string asm, list<dag> pattern>
+   : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+   // TODO: Mark the instructions with the appropriate subtarget info.
+ }
+ 
+ // ARM Double Instruction
+ class ADI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
+   // TODO: Mark the instructions with the appropriate subtarget info.
+ }
+ 
+ class ADI5<dag ops, string asm, list<dag> pattern>
+   : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+   // TODO: Mark the instructions with the appropriate subtarget info.
+ }
+ 
+ def SDT_FTOI :
+ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+ def SDT_ITOF :
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+ def SDT_CMPFP0 :
+ SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+ def SDT_FMDRR :
+ SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+                      SDTCisSameAs<1, 2>]>;
+ 
+ def arm_ftoui  : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+ def arm_ftosi  : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+ def arm_sitof  : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+ def arm_uitof  : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+ def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>;
+ def arm_cmpfp  : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+ def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
+ def arm_fmdrr  : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+ 
+ //===----------------------------------------------------------------------===//
+ //  Load / store Instructions.
+ //
+ 
+ let isLoad = 1 in {
+ def FLDD  : ADI5<(ops DPR:$dst, addrmode5:$addr),
+                  "fldd $dst, $addr",
+                  [(set DPR:$dst, (load addrmode5:$addr))]>;
+ 
+ def FLDS  : ASI5<(ops SPR:$dst, addrmode5:$addr),
+                  "flds $dst, $addr",
+                  [(set SPR:$dst, (load addrmode5:$addr))]>;
+ } // isLoad
+ 
+ let isStore = 1 in {
+ def FSTD  : ADI5<(ops DPR:$src, addrmode5:$addr),
+                  "fstd $src, $addr",
+                  [(store DPR:$src, addrmode5:$addr)]>;
+ 
+ def FSTS  : ASI5<(ops SPR:$src, addrmode5:$addr),
+                  "fsts $src, $addr",
+                  [(store SPR:$src, addrmode5:$addr)]>;
+ } // isStore
+ 
+ //===----------------------------------------------------------------------===//
+ //  Load / store multiple Instructions.
+ //
+ 
+ let isLoad = 1 in {
+ def FLDMD : ADI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
+                  "fldm${addr:submode}d ${addr:base}, $dst1",
+                  []>;
+ 
+ def FLDMS : ASI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
+                  "fldm${addr:submode}s ${addr:base}, $dst1",
+                  []>;
+ } // isLoad
+ 
+ let isStore = 1 in {
+ def FSTMD : ADI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
+                  "fstm${addr:submode}d ${addr:base}, $src1",
+                  []>;
+ 
+ def FSTMS : ASI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
+                  "fstm${addr:submode}s ${addr:base}, $src1",
+                  []>;
+ } // isStore
+ 
+ // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+ 
+ //===----------------------------------------------------------------------===//
+ // FP Binary Operations.
+ //
+ 
+ def FADDD  : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+                  "faddd $dst, $a, $b",
+                  [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+ 
+ def FADDS  : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+                  "fadds $dst, $a, $b",
+                  [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+ 
+ def FCMPED : ADI<(ops DPR:$a, DPR:$b),
+                  "fcmped $a, $b",
+                  [(arm_cmpfp DPR:$a, DPR:$b)]>;
+ 
+ def FCMPES : ASI<(ops SPR:$a, SPR:$b),
+                  "fcmpes $a, $b",
+                  [(arm_cmpfp SPR:$a, SPR:$b)]>;
+ 
+ def FDIVD  : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+                  "fdivd $dst, $a, $b",
+                  [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+ 
+ def FDIVS  : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+                  "fdivs $dst, $a, $b",
+                  [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+ 
+ def FMULD  : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+                  "fmuld $dst, $a, $b",
+                  [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+ 
+ def FMULS  : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+                  "fmuls $dst, $a, $b",
+                  [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+ 
+ 
+ def FNMULD  : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+                   "fnmuld $dst, $a, $b",
+                   [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
+ 
+ def FNMULS  : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+                   "fnmuls $dst, $a, $b",
+                   [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+ 
+ def FSUBD  : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+                  "fsubd $dst, $a, $b",
+                  [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
+ 
+ def FSUBS  : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+                  "fsubs $dst, $a, $b",
+                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+ 
+ //===----------------------------------------------------------------------===//
+ // FP Unary Operations.
+ //
+ 
+ def FABSD  : ADI<(ops DPR:$dst, DPR:$a),
+                  "fabsd $dst, $a",
+                  [(set DPR:$dst, (fabs DPR:$a))]>;
+ 
+ def FABSS  : ASI<(ops SPR:$dst, SPR:$a),
+                  "fabss $dst, $a",
+                  [(set SPR:$dst, (fabs SPR:$a))]>;
+ 
+ def FCMPEZD : ADI<(ops DPR:$a),
+                   "fcmpezd $a",
+                   [(arm_cmpfp0 DPR:$a)]>;
+ 
+ def FCMPEZS : ASI<(ops SPR:$a),
+                   "fcmpezs $a",
+                   [(arm_cmpfp0 SPR:$a)]>;
+ 
+ def FCVTDS : ADI<(ops DPR:$dst, SPR:$a),
+                  "fcvtds $dst, $a",
+                  [(set DPR:$dst, (fextend SPR:$a))]>;
+ 
+ def FCVTSD : ADI<(ops SPR:$dst, DPR:$a),
+                  "fcvtsd $dst, $a",
+                  [(set SPR:$dst, (fround DPR:$a))]>;
+ 
+ def FCPYD  : ADI<(ops DPR:$dst, DPR:$a),
+                  "fcpyd $dst, $a",
+                  [/*(set DPR:$dst, DPR:$a)*/]>;
+ 
+ def FCPYS  : ASI<(ops SPR:$dst, SPR:$a),
+                  "fcpys $dst, $a",
+                  [/*(set SPR:$dst, SPR:$a)*/]>;
+ 
+ def FNEGD  : ADI<(ops DPR:$dst, DPR:$a),
+                  "fnegd $dst, $a",
+                  [(set DPR:$dst, (fneg DPR:$a))]>;
+ 
+ def FNEGS  : ASI<(ops SPR:$dst, SPR:$a),
+                  "fnegs $dst, $a",
+                  [(set SPR:$dst, (fneg SPR:$a))]>;
+ 
+ def FSQRTD  : ADI<(ops DPR:$dst, DPR:$a),
+                  "fsqrtd $dst, $a",
+                  [(set DPR:$dst, (fsqrt DPR:$a))]>;
+ 
+ def FSQRTS  : ASI<(ops SPR:$dst, SPR:$a),
+                  "fsqrts $dst, $a",
+                  [(set SPR:$dst, (fsqrt SPR:$a))]>;
+ 
+ //===----------------------------------------------------------------------===//
+ // FP <-> GPR Copies.  Int <-> FP Conversions.
+ //
+ 
+ def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD),
+                                   "@ IMPLICIT_DEF_SPR $rD",
+                                   [(set SPR:$rD, (undef))]>;
+ def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD),
+                                   "@ IMPLICIT_DEF_DPR $rD",
+                                   [(set DPR:$rD, (undef))]>;
+ 
+ def FMRS   : ASI<(ops GPR:$dst, SPR:$src),
+                  "fmrs $dst, $src",
+                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
+ 
+ def FMSR   : ASI<(ops SPR:$dst, GPR:$src),
+                  "fmsr $dst, $src",
+                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
+ 
+ 
+ def FMRRD  : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src),
+                  "fmrrd $dst1, $dst2, $src",
+                  [/* FIXME: Can't write pattern for multiple result instr*/]>;
+ 
+ // FMDHR: GPR -> SPR
+ // FMDLR: GPR -> SPR
+ 
+ def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2),
+                 "fmdrr $dst, $src1, $src2",
+                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+ 
+ // FMRDH: SPR -> GPR
+ // FMRDL: SPR -> GPR
+ // FMRRS: SPR -> GPR
+ // FMRX : SPR system reg -> GPR
+ 
+ // FMSRR: GPR -> SPR
+ 
+ 
+ def FMSTAT : ASI<(ops), "fmstat", [(arm_fmstat)]>;
+ 
+ // FMXR: GPR -> VFP Sstem reg
+ 
+ 
+ // Int to FP:
+ 
+ def FSITOD : ADI<(ops DPR:$dst, SPR:$a),
+                  "fsitod $dst, $a",
+                  [(set DPR:$dst, (arm_sitof SPR:$a))]>;
+ 
+ def FSITOS : ASI<(ops SPR:$dst, SPR:$a),
+                  "fsitos $dst, $a",
+                  [(set SPR:$dst, (arm_sitof SPR:$a))]>;
+ 
+ def FUITOD : ADI<(ops DPR:$dst, SPR:$a),
+                  "fuitod $dst, $a",
+                  [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+ 
+ def FUITOS : ASI<(ops SPR:$dst, SPR:$a),
+                  "fuitos $dst, $a",
+                  [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+ 
+ // FP to Int:
+ // Always set Z bit in the instruction, i.e. "round towards zero" variants.
+ 
+ def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a),
+                  "ftosizd $dst, $a",
+                  [(set SPR:$dst, (arm_ftosi DPR:$a))]>;
+ 
+ def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a),
+                  "ftosizs $dst, $a",
+                  [(set SPR:$dst, (arm_ftosi SPR:$a))]>;
+ 
+ def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a),
+                  "ftouizd $dst, $a",
+                  [(set SPR:$dst, (arm_ftoui DPR:$a))]>;
+ 
+ def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a),
+                  "ftouizs $dst, $a",
+                  [(set SPR:$dst, (arm_ftoui SPR:$a))]>;
+ 
+ //===----------------------------------------------------------------------===//
+ // FP FMA Operations.
+ //
+ 
+ def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+                 "fmacd $dst, $a, $b",
+                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+                 "fmacs $dst, $a, $b",
+                 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+                 "fmscd $dst, $a, $b",
+                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+                 "fmscs $dst, $a, $b",
+                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+                  "fnmacd $dst, $a, $b",
+              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+                 "fnmacs $dst, $a, $b",
+              [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+                  "fnmscd $dst, $a, $b",
+              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+                 "fnmscs $dst, $a, $b",
+              [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
+ 
+ //===----------------------------------------------------------------------===//
+ // FP Conditional moves.
+ //
+ 
+ def FCPYDcc  : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
+                    "fcpyd$cc $dst, $true",
+                    [(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))]>,
+                    RegConstraint<"$false = $dst">;
+ 
+ def FCPYScc  : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
+                    "fcpys$cc $dst, $true",
+                    [(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))]>,
+                    RegConstraint<"$false = $dst">;
+ 
+ def FNEGDcc  : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
+                    "fnegd$cc $dst, $true",
+                    [(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))]>,
+                    RegConstraint<"$false = $dst">;
+ 
+ def FNEGScc  : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
+                    "fnegs$cc $dst, $true",
+                    [(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))]>,
+                    RegConstraint<"$false = $dst">;


Index: llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
diff -c /dev/null llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,628 ----
+ //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file contains a pass that performs load / store related peephole
+ // optimizations. This pass should be run after register allocation.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #define DEBUG_TYPE "arm-ldst-opt"
+ #include "ARM.h"
+ #include "ARMAddressingModes.h"
+ #include "ARMRegisterInfo.h"
+ #include "llvm/ADT/STLExtras.h"
+ #include "llvm/ADT/SmallVector.h"
+ #include "llvm/ADT/Statistic.h"
+ #include "llvm/CodeGen/MachineBasicBlock.h"
+ #include "llvm/CodeGen/MachineFunctionPass.h"
+ #include "llvm/CodeGen/MachineInstr.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/Support/Compiler.h"
+ #include "llvm/Target/TargetInstrInfo.h"
+ #include "llvm/Target/TargetMachine.h"
+ using namespace llvm;
+ 
+ STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+ STATISTIC(NumSTMGened , "Number of stm instructions generated");
+ STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+ STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+ 
+ namespace {
+   struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+     const TargetInstrInfo *TII;
+ 
+     virtual bool runOnMachineFunction(MachineFunction &Fn);
+ 
+     virtual const char *getPassName() const {
+       return "ARM load / store optimization pass";
+     }
+ 
+   private:
+     struct MemOpQueueEntry {
+       int Offset;
+       unsigned Position;
+       MachineBasicBlock::iterator MBBI;
+       bool Merged;
+       MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+         : Offset(o), Position(p), MBBI(i), Merged(false) {};
+     };
+     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+     typedef MemOpQueue::iterator MemOpQueueIter;
+ 
+     SmallVector<MachineBasicBlock::iterator, 4>
+     MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                  int Opcode, unsigned Size, MemOpQueue &MemOps);
+ 
+     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+   };
+ }
+ 
+ /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+ /// optimization pass.
+ FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+   return new ARMLoadStoreOpt();
+ }
+ 
+ static int getLoadStoreMultipleOpcode(int Opcode) {
+   switch (Opcode) {
+   case ARM::LDR:
+     NumLDMGened++;
+     return ARM::LDM;
+   case ARM::STR:
+     NumSTMGened++;
+     return ARM::STM;
+   case ARM::FLDS:
+     NumFLDMGened++;
+     return ARM::FLDMS;
+   case ARM::FSTS:
+     NumFSTMGened++;
+     return ARM::FSTMS;
+   case ARM::FLDD:
+     NumFLDMGened++;
+     return ARM::FLDMD;
+   case ARM::FSTD:
+     NumFSTMGened++;
+     return ARM::FSTMD;
+   default: abort();
+   }
+   return 0;
+ }
+ 
+ /// mergeOps - Create and insert a LDM or STM with Base as base register and
+ /// registers in Regs as the register operands that would be loaded / stored.
+ /// It returns true if the transformation is done. 
+ static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                      int Offset, unsigned Base, int Opcode,
+                      SmallVector<unsigned, 8> &Regs,
+                      const TargetInstrInfo *TII) {
+   // Only a single register to load / store. Don't bother.
+   unsigned NumRegs = Regs.size();
+   if (NumRegs <= 1)
+     return false;
+ 
+   ARM_AM::AMSubMode Mode = ARM_AM::ia;
+   bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+   if (isAM4 && Offset == 4)
+     Mode = ARM_AM::ib;
+   else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+     Mode = ARM_AM::da;
+   else if (isAM4 && Offset == -4 * (int)NumRegs)
+     Mode = ARM_AM::db;
+   else if (Offset != 0) {
+     // If starting offset isn't zero, insert a MI to materialize a new base.
+     // But only do so if it is cost effective, i.e. merging more than two
+     // loads / stores.
+     if (NumRegs <= 2)
+       return false;
+ 
+     unsigned NewBase;
+     if (Opcode == ARM::LDR)
+       // If it is a load, then just use one of the destination register to
+       // use as the new base.
+       NewBase = Regs[NumRegs-1];
+     else {
+       // FIXME: Try scavenging a register to use as a new base.
+       NewBase = ARM::R12;
+     }
+     int BaseOpc = ARM::ADDri;
+     if (Offset < 0) {
+       BaseOpc = ARM::SUBri;
+       Offset = - Offset;
+     }
+     int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+     if (ImmedOffset == -1)
+       return false;  // Probably not worth it then.
+     BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase).addReg(Base).addImm(ImmedOffset);
+     Base = NewBase;
+   }
+ 
+   bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+   bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+   Opcode = getLoadStoreMultipleOpcode(Opcode);
+   MachineInstrBuilder MIB = (isAM4)
+     ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
+         .addImm(ARM_AM::getAM4ModeImm(Mode))
+     : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
+         .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs));
+   for (unsigned i = 0; i != NumRegs; ++i)
+     MIB = MIB.addReg(Regs[i], Opcode == isDef);
+ 
+   return true;
+ }
+ 
+ SmallVector<MachineBasicBlock::iterator, 4>
+ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB,
+                               unsigned SIndex, unsigned Base, int Opcode,
+                               unsigned Size, MemOpQueue &MemOps) {
+   bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+   SmallVector<MachineBasicBlock::iterator, 4> Merges;
+   int Offset = MemOps[SIndex].Offset;
+   int SOffset = Offset;
+   unsigned Pos = MemOps[SIndex].Position;
+   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+   SmallVector<unsigned, 8> Regs;
+   unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+   unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+   Regs.push_back(PReg);
+   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+     int NewOffset = MemOps[i].Offset;
+     unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+     unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+     // AM4 - register numbers in ascending order.
+     // AM5 - consecutive register numbers in ascending order.
+     if (NewOffset == Offset + (int)Size &&
+         ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+       Offset += Size;
+       Regs.push_back(Reg);
+       PRegNum = RegNum;
+     } else {
+       // Can't merge this in. Try merge the earlier ones first.
+       if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
+         Merges.push_back(prior(Loc));
+         for (unsigned j = SIndex; j < i; ++j) {
+           MBB.erase(MemOps[j].MBBI);
+           MemOps[j].Merged = true;
+         }
+       }
+       SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+         MergeLDR_STR(MBB, i, Base, Opcode, Size, MemOps);
+       Merges.append(Merges2.begin(), Merges2.end());
+       return Merges;
+     }
+ 
+     if (MemOps[i].Position > Pos) {
+       Pos = MemOps[i].Position;
+       Loc = MemOps[i].MBBI;
+     }
+   }
+ 
+   if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
+     Merges.push_back(prior(Loc));
+     for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+       MBB.erase(MemOps[i].MBBI);
+       MemOps[i].Merged = true;
+     }
+   }
+ 
+   return Merges;
+ }
+ 
+ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+                                        unsigned Bytes) {
+   return (MI && MI->getOpcode() == ARM::SUBri &&
+           MI->getOperand(0).getReg() == Base &&
+           MI->getOperand(1).getReg() == Base &&
+           ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
+ }
+ 
+ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+                                        unsigned Bytes) {
+   return (MI && MI->getOpcode() == ARM::ADDri &&
+           MI->getOperand(0).getReg() == Base &&
+           MI->getOperand(1).getReg() == Base &&
+           ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
+ }
+ 
+ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+   switch (MI->getOpcode()) {
+   default: return 0;
+   case ARM::LDR:
+   case ARM::STR:
+   case ARM::FLDS:
+   case ARM::FSTS:
+     return 4;
+   case ARM::FLDD:
+   case ARM::FSTD:
+     return 8;
+   case ARM::LDM:
+   case ARM::STM:
+     return (MI->getNumOperands() - 2) * 4;
+   case ARM::FLDMS:
+   case ARM::FSTMS:
+   case ARM::FLDMD:
+   case ARM::FSTMD:
+     return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+   }
+ }
+ 
+ /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+ /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+ ///
+ /// stmia rn, <ra, rb, rc>
+ /// rn := rn + 4 * 3;
+ /// =>
+ /// stmia rn!, <ra, rb, rc>
+ ///
+ /// rn := rn - 4 * 3;
+ /// ldmia rn, <ra, rb, rc>
+ /// =>
+ /// ldmdb rn!, <ra, rb, rc>
+ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+   MachineInstr *MI = MBBI;
+   unsigned Base = MI->getOperand(0).getReg();
+   unsigned Bytes = getLSMultipleTransferSize(MI);
+   int Opcode = MI->getOpcode();
+   bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+ 
+   if (isAM4) {
+     if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+       return false;
+ 
+     // Can't use the updating AM4 sub-mode if the base register is also a dest
+     // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+     for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) {
+       if (MI->getOperand(i).getReg() == Base)
+         return false;
+     }
+ 
+     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+     if (MBBI != MBB.begin()) {
+       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+       if (Mode == ARM_AM::ia &&
+           isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+         MBB.erase(PrevMBBI);
+         return true;
+       } else if (Mode == ARM_AM::ib &&
+                  isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+         MBB.erase(PrevMBBI);
+         return true;
+       }
+     }
+ 
+     if (MBBI != MBB.end()) {
+       MachineBasicBlock::iterator NextMBBI = next(MBBI);
+       if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+           isMatchingIncrement(NextMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+         MBB.erase(NextMBBI);
+         return true;
+       } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+                  isMatchingDecrement(NextMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+         MBB.erase(NextMBBI);
+         return true;
+       }
+     }
+   } else {
+     // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+     if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+       return false;
+ 
+     ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+     unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+     if (MBBI != MBB.begin()) {
+       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+       if (Mode == ARM_AM::ia &&
+           isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+         MBB.erase(PrevMBBI);
+         return true;
+       }
+     }
+ 
+     if (MBBI != MBB.end()) {
+       MachineBasicBlock::iterator NextMBBI = next(MBBI);
+       if (Mode == ARM_AM::ia &&
+           isMatchingIncrement(NextMBBI, Base, Bytes)) {
+         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+         MBB.erase(NextMBBI);
+       }
+       return true;
+     }
+   }
+ 
+   return false;
+ }
+ 
+ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+   switch (Opc) {
+   case ARM::LDR: return ARM::LDR_PRE;
+   case ARM::STR: return ARM::STR_PRE;
+   case ARM::FLDS: return ARM::FLDMS;
+   case ARM::FLDD: return ARM::FLDMD;
+   case ARM::FSTS: return ARM::FSTMS;
+   case ARM::FSTD: return ARM::FSTMD;
+   default: abort();
+   }
+   return 0;
+ }
+ 
+ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+   switch (Opc) {
+   case ARM::LDR: return ARM::LDR_POST;
+   case ARM::STR: return ARM::STR_POST;
+   case ARM::FLDS: return ARM::FLDMS;
+   case ARM::FLDD: return ARM::FLDMD;
+   case ARM::FSTS: return ARM::FSTMS;
+   case ARM::FSTD: return ARM::FSTMD;
+   default: abort();
+   }
+   return 0;
+ }
+ 
+ /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+ /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const TargetInstrInfo *TII) {
+   MachineInstr *MI = MBBI;
+   unsigned Base = MI->getOperand(1).getReg();
+   unsigned Bytes = getLSMultipleTransferSize(MI);
+   int Opcode = MI->getOpcode();
+   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+   if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+       (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+     return false;
+ 
+   bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+   // Can't do the merge if the destination register is the same as the would-be
+   // writeback register.
+   if (isLd && MI->getOperand(0).getReg() == Base)
+     return false;
+ 
+   bool DoMerge = false;
+   ARM_AM::AddrOpc AddSub = ARM_AM::add;
+   unsigned NewOpc = 0;
+   if (MBBI != MBB.begin()) {
+     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+     if (isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+       DoMerge = true;
+       AddSub = ARM_AM::sub;
+       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+     } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) {
+       DoMerge = true;
+       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+     }
+     if (DoMerge)
+       MBB.erase(PrevMBBI);
+   }
+ 
+   if (!DoMerge && MBBI != MBB.end()) {
+     MachineBasicBlock::iterator NextMBBI = next(MBBI);
+     if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) {
+       DoMerge = true;
+       AddSub = ARM_AM::sub;
+       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+     } else if (isMatchingIncrement(NextMBBI, Base, Bytes)) {
+       DoMerge = true;
+       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+     }
+     if (DoMerge)
+       MBB.erase(NextMBBI);
+   }
+ 
+   if (!DoMerge)
+     return false;
+ 
+   bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+   unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+     : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+                         true, isDPR ? 2 : 1);
+   if (isLd) {
+     if (isAM2)
+       BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
+         .addReg(Base, true).addReg(Base).addReg(0).addImm(Offset);
+     else
+       BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
+         .addImm(Offset).addReg(MI->getOperand(0).getReg(), true);
+   } else {
+     if (isAM2)
+       BuildMI(MBB, MBBI, TII->get(NewOpc), Base).addReg(MI->getOperand(0).getReg())
+         .addReg(Base).addReg(0).addImm(Offset);
+     else
+       BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
+         .addImm(Offset).addReg(MI->getOperand(0).getReg(), false);
+   }
+   MBB.erase(MBBI);
+ 
+   return true;
+ }
+ 
+ /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+ /// ops of the same base and incrementing offset into LDM / STM ops.
+ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+   unsigned NumMerges = 0;
+   unsigned NumMemOps = 0;
+   MemOpQueue MemOps;
+   unsigned CurrBase = 0;
+   int CurrOpc = -1;
+   unsigned CurrSize = 0;
+   unsigned Position = 0;
+   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+   while (MBBI != E) {
+     bool Advance  = false;
+     bool TryMerge = false;
+     bool Clobber  = false;
+ 
+     int Opcode = MBBI->getOpcode();
+     bool isMemOp = false;
+     bool isAM2 = false;
+     unsigned Size = 4;
+     switch (Opcode) {
+     case ARM::LDR:
+     case ARM::STR:
+       isMemOp =
+         (MBBI->getOperand(1).isRegister() && MBBI->getOperand(2).getReg() == 0);
+       isAM2 = true;
+       break;
+     case ARM::FLDS:
+     case ARM::FSTS:
+       isMemOp = MBBI->getOperand(1).isRegister();
+       break;
+     case ARM::FLDD:
+     case ARM::FSTD:
+       isMemOp = MBBI->getOperand(1).isRegister();
+       Size = 8;
+       break;
+     }
+     if (isMemOp) {
+       unsigned Base = MBBI->getOperand(1).getReg();
+       unsigned OffIdx = MBBI->getNumOperands()-1;
+       unsigned OffField = MBBI->getOperand(OffIdx).getImm();
+       int Offset = isAM2
+         ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+       if (isAM2) {
+         if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+           Offset = -Offset;
+       } else {
+         if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+           Offset = -Offset;
+       }
+       // Watch out for:
+       // r4 := ldr [r5]
+       // r5 := ldr [r5, #4]
+       // r6 := ldr [r5, #8]
+       //
+       // The second ldr has effectively broken the chain even though it
+       // looks like the later ldr(s) use the same base register. Try to
+       // merge the ldr's so far, including this one. But don't try to
+       // combine the following ldr(s).
+       Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+       if (CurrBase == 0 && !Clobber) {
+         // Start of a new chain.
+         CurrBase = Base;
+         CurrOpc  = Opcode;
+         CurrSize = Size;
+         MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+         NumMemOps++;
+         Advance = true;
+       } else {
+         if (Clobber) {
+           TryMerge = true;
+           Advance = true;
+         }
+ 
+         if (CurrOpc == Opcode && CurrBase == Base) {
+           // Continue adding to the queue.
+           if (Offset > MemOps.back().Offset) {
+             MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+             NumMemOps++;
+             Advance = true;
+           } else {
+             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                  I != E; ++I) {
+               if (Offset < I->Offset) {
+                 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+                 NumMemOps++;
+                 Advance = true;
+                 break;
+               } else if (Offset == I->Offset) {
+                 // Collision! This can't be merged!
+                 break;
+               }
+             }
+           }
+         }
+       }
+     }
+ 
+     if (Advance) {
+       ++Position;
+       ++MBBI;
+     } else
+       TryMerge = true;
+ 
+     if (TryMerge) {
+       if (NumMemOps > 1) {
+         SmallVector<MachineBasicBlock::iterator,4> MBBII =
+           MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,MemOps);
+         // Try folding preceeding/trailing base inc/dec into the generated
+         // LDM/STM ops.
+         for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+           if (mergeBaseUpdateLSMultiple(MBB, MBBII[i]))
+             NumMerges++;
+         NumMerges += MBBII.size();
+       }
+ 
+       // Try folding preceeding/trailing base inc/dec into those load/store
+       // that were not merged to form LDM/STM ops.
+       for (unsigned i = 0; i != NumMemOps; ++i)
+         if (!MemOps[i].Merged)
+           if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII))
+             NumMerges++;
+ 
+       CurrBase = 0;
+       CurrOpc = -1;
+       if (NumMemOps) {
+         MemOps.clear();
+         NumMemOps = 0;
+       }
+ 
+       // If iterator hasn't been advanced and this is not a memory op, skip it.
+       // It can't start a new chain anyway.
+       if (!Advance && !isMemOp && MBBI != E) {
+         ++Position;
+         ++MBBI;
+       }
+     }
+   }
+   return NumMerges > 0;
+ }
+ 
+ /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+ /// (bx lr) into the preceeding stack restore so it directly restore the value
+ /// of LR into pc.
+ ///   ldmfd sp!, {r7, lr}
+ ///   bx lr
+ /// =>
+ ///   ldmfd sp!, {r7, pc}
+ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+   if (MBB.empty()) return false;
+ 
+   MachineBasicBlock::iterator MBBI = prior(MBB.end());
+   if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+     MachineInstr *PrevMI = prior(MBBI);
+     if (PrevMI->getOpcode() == ARM::LDM) {
+       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+       if (MO.getReg() == ARM::LR) {
+         PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET));
+         MO.setReg(ARM::PC);
+         MBB.erase(MBBI);
+         return true;
+       }
+     }
+   }
+   return false;
+ }
+ 
+ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+   TII = Fn.getTarget().getInstrInfo();
+   bool Modified = false;
+   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+        ++MFI) {
+     MachineBasicBlock &MBB = *MFI;
+     Modified |= LoadStoreMultipleOpti(MBB);
+     Modified |= MergeReturnIntoLDM(MBB);
+   }
+   return Modified;
+ }


Index: llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
diff -c /dev/null llvm/lib/Target/ARM/ARMMachineFunctionInfo.h:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMMachineFunctionInfo.h	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,136 ----
+ //====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+ // 
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by the Evan Cheng and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ // 
+ //===----------------------------------------------------------------------===//
+ //
+ // This file declares ARM-specific per-machine-function information.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef ARMMACHINEFUNCTIONINFO_H
+ #define ARMMACHINEFUNCTIONINFO_H
+ 
+ #include "ARMSubtarget.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/Target/TargetMachine.h"
+ 
+ namespace llvm {
+ 
+ /// ARMFunctionInfo - This class is derived from MachineFunction private
+ /// ARM target-specific information for each MachineFunction.
+ class ARMFunctionInfo : public MachineFunctionInfo {
+ 
+   /// isThumb - True if this function is compiled under Thumb mode.
+   ///
+   bool isThumb;
+ 
+   /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+   ///
+   unsigned VarArgsRegSaveSize;
+ 
+   /// FramePtrSpilled - True if FP register is spilled. Set by
+   /// processFunctionBeforeCalleeSavedScan().
+   bool FramePtrSpilled;
+ 
+   /// FramePtrSpillOffset - If FramePtrSpilled, this records the frame pointer
+   /// spill stack offset.
+   unsigned FramePtrSpillOffset;
+ 
+   /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+   /// register spills areas. For Mac OS X:
+   ///
+   /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+   /// --------------------------------------------
+   /// GPR callee-saved (2) : r8, r10, r11
+   /// --------------------------------------------
+   /// DPR callee-saved : d8 - d15
+   unsigned GPRCS1Offset;
+   unsigned GPRCS2Offset;
+   unsigned DPRCSOffset;
+ 
+   /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+   /// areas.
+   unsigned GPRCS1Size;
+   unsigned GPRCS2Size;
+   unsigned DPRCSSize;
+ 
+   /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+   /// which belong to these spill areas.
+   std::set<int> GPRCS1Frames;
+   std::set<int> GPRCS2Frames;
+   std::set<int> DPRCSFrames;
+ 
+   /// JumpTableUId - Unique id for jumptables.
+   ///
+   unsigned JumpTableUId;
+ 
+ public:
+   ARMFunctionInfo() :
+     isThumb(false),
+     VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
+     GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
+ 
+   ARMFunctionInfo(MachineFunction &MF) :
+     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+     VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
+     GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
+ 
+   bool isThumbFunction() const { return isThumb; }
+ 
+   unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+   void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+ 
+   bool isFramePtrSpilled() const { return FramePtrSpilled; }
+   void setFramePtrSpilled(bool s) { FramePtrSpilled = s; }
+   unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+   void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+   
+   unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+   unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+   unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
+ 
+   void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+   void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+   void setDPRCalleeSavedAreaOffset(unsigned o)  { DPRCSOffset = o; }
+ 
+   unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+   unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+   unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
+ 
+   void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+   void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+   void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
+ 
+   bool isGPRCalleeSavedArea1Frame(unsigned fi) const {
+     return GPRCS1Frames.count(fi);
+   }
+   bool isGPRCalleeSavedArea2Frame(unsigned fi) const {
+     return GPRCS2Frames.count(fi);
+   }
+   bool isDPRCalleeSavedAreaFrame(unsigned fi) const {
+     return DPRCSFrames.count(fi);
+   }
+ 
+   void addGPRCalleeSavedArea1Frame(unsigned fi) {
+     GPRCS1Frames.insert(fi);
+   }
+   void addGPRCalleeSavedArea2Frame(unsigned fi) {
+     GPRCS2Frames.insert(fi);
+   }
+   void addDPRCalleeSavedAreaFrame(unsigned fi) {
+     DPRCSFrames.insert(fi);
+   }
+ 
+   unsigned createJumpTableUId() {
+     return JumpTableUId++;
+   }
+ };
+ } // End llvm namespace
+ 
+ #endif // ARMMACHINEFUNCTIONINFO_H


Index: llvm/lib/Target/ARM/ARMSubtarget.cpp
diff -c /dev/null llvm/lib/Target/ARM/ARMSubtarget.cpp:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMSubtarget.cpp	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,52 ----
+ //===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the ARM specific subclass of TargetSubtarget.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "ARMSubtarget.h"
+ #include "ARMGenSubtarget.inc"
+ #include "llvm/Module.h"
+ #include "llvm/Support/CommandLine.h"
+ using namespace llvm;
+ 
+ // FIXME: this is temporary.
+ static cl::opt<bool> Thumb("enable-thumb",
+                            cl::desc("Switch to thumb mode in ARM backend"));
+ 
+ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS)
+   : ARMArchVersion(V4T), HasVFP2(false), IsDarwin(false),
+     UseThumbBacktraces(false), IsR9Reserved(false), stackAlignment(8) {
+ 
+   // Determine default and user specified characteristics
+   std::string CPU = "generic";
+ 
+   // Parse features string.
+   ParseSubtargetFeatures(FS, CPU);
+ 
+   IsThumb = Thumb;
+   
+   // Set the boolean corresponding to the current target triple, or the default
+   // if one cannot be determined, to true.
+   const std::string& TT = M.getTargetTriple();
+   if (TT.length() > 5) {
+     IsDarwin = TT.find("-darwin") != std::string::npos;
+   } else if (TT.empty()) {
+ #if defined(__APPLE__)
+     IsDarwin = true;
+ #endif
+   }
+ 
+   if (IsDarwin) {
+     UseThumbBacktraces = true;
+     IsR9Reserved = true;
+     stackAlignment = 4;
+   } 
+ }


Index: llvm/lib/Target/ARM/ARMSubtarget.h
diff -c /dev/null llvm/lib/Target/ARM/ARMSubtarget.h:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/ARMSubtarget.h	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,82 ----
+ //=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Evan Cheng and is distributed under the
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file declares the ARM specific subclass of TargetSubtarget.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef ARMSUBTARGET_H
+ #define ARMSUBTARGET_H
+ 
+ #include "llvm/Target/TargetSubtarget.h"
+ #include <string>
+ 
+ namespace llvm {
+ class Module;
+ 
+ class ARMSubtarget : public TargetSubtarget {
+ protected:
+   enum ARMArchEnum {
+     V4T, V5T, V5TE, V6
+   };
+ 
+   /// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE,
+   /// and V6.
+   ARMArchEnum ARMArchVersion;
+ 
+   /// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2
+   /// instructions.
+   bool HasVFP2;
+ 
+   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+   bool IsThumb;
+ 
+   bool IsDarwin;
+   
+   /// UseThumbBacktraces - True if we use thumb style backtraces.
+   bool UseThumbBacktraces;
+ 
+   /// IsR9Reserved - True if R9 is a not available as general purpose register.
+   bool IsR9Reserved;
+   
+   /// stackAlignment - The minimum alignment known to hold of the stack frame on
+   /// entry to the function and which must be maintained by every function.
+   unsigned stackAlignment;
+ 
+  public:
+   /// This constructor initializes the data members to match that
+   /// of the specified module.
+   ///
+   ARMSubtarget(const Module &M, const std::string &FS);
+ 
+   /// ParseSubtargetFeatures - Parses features string setting specified 
+   /// subtarget options.  Definition of function is auto generated by tblgen.
+   void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+ 
+   bool hasV4TOps()  const { return ARMArchVersion >= V4T; }
+   bool hasV5TOps()  const { return ARMArchVersion >= V5T; }
+   bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+   bool hasV6Ops()   const { return ARMArchVersion >= V6; }
+ 
+   bool hasVFP2() const { return HasVFP2; }
+   
+   bool isDarwin() const { return IsDarwin; }
+   bool isThumb() const { return IsThumb; }
+ 
+   bool useThumbBacktraces() const { return UseThumbBacktraces; }
+   bool isR9Reserved() const { return IsR9Reserved; }
+ 
+   /// getStackAlignment - Returns the minimum alignment known to hold of the
+   /// stack frame on entry to the function and which must be maintained by every
+   /// function for this subtarget.
+   unsigned getStackAlignment() const { return stackAlignment; }
+ };
+ } // End llvm namespace
+ 
+ #endif  // ARMSUBTARGET_H


Index: llvm/lib/Target/ARM/README-Thumb.txt
diff -c /dev/null llvm/lib/Target/ARM/README-Thumb.txt:1.1
*** /dev/null	Fri Jan 19 01:51:57 2007
--- llvm/lib/Target/ARM/README-Thumb.txt	Fri Jan 19 01:51:42 2007
***************
*** 0 ****
--- 1,17 ----
+ //===---------------------------------------------------------------------===//
+ // Random ideas for the ARM backend (Thumb specific).
+ //===---------------------------------------------------------------------===//
+ 
+ * Add support for compiling functions in both ARM and Thumb mode, then taking
+   the smallest.
+ * Add support for compiling individual basic blocks in thumb mode, when in a 
+   larger ARM function.  This can be used for presumed cold code, like paths
+   to abort (failure path of asserts), EH handling code, etc.
+ 
+ * Thumb doesn't have normal pre/post increment addressing modes, but you can
+   load/store 32-bit integers with pre/postinc by using load/store multiple
+   instrs with a single register.
+ 
+ * Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+   and cmp instructions can use high registers. Also, we can use them as
+   temporaries to spill values into.


Index: llvm/lib/Target/ARM/ARM.h
diff -u llvm/lib/Target/ARM/ARM.h:1.7 llvm/lib/Target/ARM/ARM.h:1.8
--- llvm/lib/Target/ARM/ARM.h:1.7	Thu Nov  2 09:00:02 2006
+++ llvm/lib/Target/ARM/ARM.h	Fri Jan 19 01:51:42 2007
@@ -20,43 +20,77 @@
 #include <cassert>
 
 namespace llvm {
-  // Enums corresponding to ARM condition codes
-  namespace ARMCC {
-    enum CondCodes {
-      EQ,
-      NE,
-      CS,
-      CC,
-      MI,
-      PL,
-      VS,
-      VC,
-      HI,
-      LS,
-      GE,
-      LT,
-      GT,
-      LE,
-      AL
-    };
+
+class ARMTargetMachine;
+class FunctionPass;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+  enum CondCodes {
+    EQ,
+    NE,
+    HS,
+    LO,
+    MI,
+    PL,
+    VS,
+    VC,
+    HI,
+    LS,
+    GE,
+    LT,
+    GT,
+    LE,
+    AL
+  };
+  
+  inline static CondCodes getOppositeCondition(CondCodes CC){
+    switch (CC) {
+    default: assert(0 && "Unknown condition code");
+    case EQ: return NE;
+    case NE: return EQ;
+    case HS: return LO;
+    case LO: return HS;
+    case MI: return PL;
+    case PL: return MI;
+    case VS: return VC;
+    case VC: return VS;
+    case HI: return LS;
+    case LS: return HI;
+    case GE: return LT;
+    case LT: return GE;
+    case GT: return LE;
+    case LE: return GT;
+    }
   }
+}
 
-  namespace ARMShift {
-    enum ShiftTypes {
-      LSL,
-      LSR,
-      ASR,
-      ROR,
-      RRX
-    };
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+  switch (CC) {
+  default: assert(0 && "Unknown condition code");
+  case ARMCC::EQ:  return "eq";
+  case ARMCC::NE:  return "ne";
+  case ARMCC::HS:  return "hs";
+  case ARMCC::LO:  return "lo";
+  case ARMCC::MI:  return "mi";
+  case ARMCC::PL:  return "pl";
+  case ARMCC::VS:  return "vs";
+  case ARMCC::VC:  return "vc";
+  case ARMCC::HI:  return "hi";
+  case ARMCC::LS:  return "ls";
+  case ARMCC::GE:  return "ge";
+  case ARMCC::LT:  return "lt";
+  case ARMCC::GT:  return "gt";
+  case ARMCC::LE:  return "le";
+  case ARMCC::AL:  return "al";
   }
+}
 
-  class FunctionPass;
-  class TargetMachine;
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM);
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
 
-  FunctionPass *createARMISelDag(TargetMachine &TM);
-  FunctionPass *createARMCodePrinterPass(std::ostream &OS, TargetMachine &TM);
-  FunctionPass *createARMFixMulPass();
 } // end namespace llvm;
 
 // Defines symbolic names for ARM registers.  This defines a mapping from


Index: llvm/lib/Target/ARM/ARM.td
diff -u llvm/lib/Target/ARM/ARM.td:1.3 llvm/lib/Target/ARM/ARM.td:1.4
--- llvm/lib/Target/ARM/ARM.td:1.3	Wed May 17 19:11:26 2006
+++ llvm/lib/Target/ARM/ARM.td	Fri Jan 19 01:51:42 2007
@@ -18,6 +18,73 @@
 include "../Target.td"
 
 //===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+                                   "ARM v4T">;
+def ArchV5T     : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+                                   "ARM v5T">;
+def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+                                   "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+                                   "ARM v6">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true",
+                                   "Enable VFP2 instructions ">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic",         []>;
+def : Proc<"arm8",            []>;
+def : Proc<"arm810",          []>;
+def : Proc<"strongarm",       []>;
+def : Proc<"strongarm110",    []>;
+def : Proc<"strongarm1100",   []>;
+def : Proc<"strongarm1110",   []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi",        [ArchV4T]>;
+def : Proc<"arm7tdmi-s",      [ArchV4T]>;
+def : Proc<"arm710t",         [ArchV4T]>;
+def : Proc<"arm720t",         [ArchV4T]>;
+def : Proc<"arm9",            [ArchV4T]>;
+def : Proc<"arm9tdmi",        [ArchV4T]>;
+def : Proc<"arm920",          [ArchV4T]>;
+def : Proc<"arm920t",         [ArchV4T]>;
+def : Proc<"arm922t",         [ArchV4T]>;
+def : Proc<"arm940t",         [ArchV4T]>;
+def : Proc<"ep9312",          [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi",       [ArchV5T]>;
+def : Proc<"arm1020t",        [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e",           [ArchV5TE]>;
+def : Proc<"arm946e-s",       [ArchV5TE]>;
+def : Proc<"arm966e-s",       [ArchV5TE]>;
+def : Proc<"arm968e-s",       [ArchV5TE]>;
+def : Proc<"arm10e",          [ArchV5TE]>;
+def : Proc<"arm1020e",        [ArchV5TE]>;
+def : Proc<"arm1022e",        [ArchV5TE]>;
+def : Proc<"xscale",          [ArchV5TE]>;
+def : Proc<"iwmmxt",          [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s",      [ArchV6]>;
+def : Proc<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s",     [ArchV6]>;
+def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp",     [ArchV6]>;
+def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -31,8 +98,14 @@
 
 def ARMInstrInfo : InstrInfo {
   // Define how we want to layout our target-specific information field.
-  let TSFlagsFields = [];
-  let TSFlagsShifts = [];
+  let TSFlagsFields = ["AddrModeBits",
+                       "SizeFlag",
+                       "IndexModeBits",
+                       "Opcode"];
+  let TSFlagsShifts = [0,
+                       4,
+                       7,
+                       9];
 }
 
 //===----------------------------------------------------------------------===//


Index: llvm/lib/Target/ARM/ARMAsmPrinter.cpp
diff -u llvm/lib/Target/ARM/ARMAsmPrinter.cpp:1.42 llvm/lib/Target/ARM/ARMAsmPrinter.cpp:1.43
--- llvm/lib/Target/ARM/ARMAsmPrinter.cpp:1.42	Thu Dec 21 16:59:58 2006
+++ llvm/lib/Target/ARM/ARMAsmPrinter.cpp	Fri Jan 19 01:51:42 2007
@@ -15,54 +15,49 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineDebugInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Mangler.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
 #include <cctype>
+#include <iostream>
+#include <set>
 using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
-    switch (CC) {
-    default: assert(0 && "Unknown condition code");
-    case ARMCC::EQ:  return "eq";
-    case ARMCC::NE:  return "ne";
-    case ARMCC::CS:  return "cs";
-    case ARMCC::CC:  return "cc";
-    case ARMCC::MI:  return "mi";
-    case ARMCC::PL:  return "pl";
-    case ARMCC::VS:  return "vs";
-    case ARMCC::VC:  return "vc";
-    case ARMCC::HI:  return "hi";
-    case ARMCC::LS:  return "ls";
-    case ARMCC::GE:  return "ge";
-    case ARMCC::LT:  return "lt";
-    case ARMCC::GT:  return "gt";
-    case ARMCC::LE:  return "le";
-    case ARMCC::AL:  return "al";
-    }
-  }
-
   struct VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
     ARMAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
-      : AsmPrinter(O, TM, T) {
+      : AsmPrinter(O, TM, T), DW(O, this, T), AFI(NULL), InCPMode(false) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
 
+    DwarfWriter DW;
+
+    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+    /// make the right decision when printing asm code for different targets.
+    const ARMSubtarget *Subtarget;
+
+    /// AFI - Keep a pointer to ARMFunctionInfo for the current
+    /// MachineFunction
+    ARMFunctionInfo *AFI;
+
     /// We name each basic block in a Function with a unique number, so
     /// that we can consistently refer to them later. This is cleared
     /// at the beginning of each call to runOnMachineFunction().
@@ -70,22 +65,79 @@
     typedef std::map<const Value *, unsigned> ValueMapTy;
     ValueMapTy NumberForBB;
 
+    /// Keeps the set of GlobalValues that require non-lazy-pointers for
+    /// indirect access.
+    std::set<std::string> GVNonLazyPtrs;
+
+    /// Keeps the set of external function GlobalAddresses that the asm
+    /// printer should generate stubs for.
+    std::set<std::string> FnStubs;
+
+    /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+    bool InCPMode;
+    
     virtual const char *getPassName() const {
       return "ARM Assembly Printer";
     }
 
-    void printAddrMode1(const MachineInstr *MI, int opNum);
-    void printAddrMode2(const MachineInstr *MI, int opNum);
-    void printAddrMode5(const MachineInstr *MI, int opNum);
-    void printOperand(const MachineInstr *MI, int opNum);
-    void printMemOperand(const MachineInstr *MI, int opNum,
-                         const char *Modifier = 0);
+    void printOperand(const MachineInstr *MI, int opNum,
+                      const char *Modifier = 0);
+    void printSOImmOperand(const MachineInstr *MI, int opNum);
+    void printSORegOperand(const MachineInstr *MI, int opNum);
+    void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+    void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+    void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+                               const char *Modifier = 0);
+    void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+                               const char *Modifier = 0);
+    void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+                                const char *Modifier = 0);
+    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+                                      unsigned Scale);
+    void printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
     void printCCOperand(const MachineInstr *MI, int opNum);
+    void printPCLabel(const MachineInstr *MI, int opNum);
+    void printRegisterList(const MachineInstr *MI, int opNum);
+    void printCPInstOperand(const MachineInstr *MI, int opNum,
+                            const char *Modifier);
+    void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode);
 
     bool printInstruction(const MachineInstr *MI);  // autogenerated.
+    void printMachineInstruction(const MachineInstr *MI);
     bool runOnMachineFunction(MachineFunction &F);
     bool doInitialization(Module &M);
     bool doFinalization(Module &M);
+
+    virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+      printDataDirective(MCPV->getType());
+
+      ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MCPV;
+      std::string Name = Mang->getValueName(ACPV->getGV());
+      if (ACPV->isNonLazyPointer()) {
+        GVNonLazyPtrs.insert(Name);
+        O << TAI->getPrivateGlobalPrefix() << Name << "$non_lazy_ptr";
+      } else
+        O << Name;
+      if (ACPV->getPCAdjustment() != 0)
+        O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+          << utostr(ACPV->getLabelId())
+          << "+" << (unsigned)ACPV->getPCAdjustment() << ")";
+      O << "\n";
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<MachineDebugInfo>();
+    }
   };
 } // end of anonymous namespace
 
@@ -97,55 +149,64 @@
 /// regardless of whether the function is in SSA form.
 ///
 FunctionPass *llvm::createARMCodePrinterPass(std::ostream &o,
-                                               TargetMachine &tm) {
+                                             ARMTargetMachine &tm) {
   return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo());
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
+/// runOnMachineFunction - This uses the printInstruction()
 /// method to print assembly for each instruction.
 ///
 bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  SetupMachineFunction(MF);
-  O << "\n\n";
+  AFI = MF.getInfo<ARMFunctionInfo>();
 
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  const std::vector<MachineConstantPoolEntry>
-    &CP = MF.getConstantPool()->getConstants();
-  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
-    MachineConstantPoolEntry CPE = CP[i];
-    if (!CPE.isMachineConstantPoolEntry()){
-      Constant *CV = CPE.Val.ConstVal;
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
-        if (GV->hasExternalWeakLinkage()) {
-          ExtWeakSymbols.insert(GV);
-        }
-      }
-    }
+  if (Subtarget->isDarwin()) {
+    DW.SetDebugInfo(&getAnalysis<MachineDebugInfo>());
   }
 
-  // Print out jump tables referenced by the function
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+  SetupMachineFunction(MF);
+  O << "\n";
+
+  // NOTE: we don't print out constant pools here, they are handled as
+  // instructions.
 
+  O << "\n";
   // Print out labels for the function.
   const Function *F = MF.getFunction();
-  SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
-
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:
+    SwitchToTextSection("\t.text", F);
     break;
   case Function::ExternalLinkage:
+    SwitchToTextSection("\t.text", F);
     O << "\t.globl\t" << CurrentFnName << "\n";
     break;
   case Function::WeakLinkage:
   case Function::LinkOnceLinkage:
-    O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+    if (Subtarget->isDarwin()) {
+      SwitchToTextSection(
+                ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+      O << "\t.globl\t" << CurrentFnName << "\n";
+      O << "\t.weak_definition\t" << CurrentFnName << "\n";
+    } else {
+      O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+    }
     break;
   }
-  EmitAlignment(2, F);
+
+  if (AFI->isThumbFunction()) {
+    EmitAlignment(1, F);
+    O << "\t.code\t16\n";
+    O << "\t.thumb_func\t" << CurrentFnName << "\n";
+    InCPMode = false;
+  } else
+    EmitAlignment(2, F);
+
   O << CurrentFnName << ":\n";
+  if (Subtarget->isDarwin()) {
+    // Emit pre-function debug information.
+    DW.BeginFunction(&MF);
+  }
 
   // Print out code for the function.
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -158,127 +219,340 @@
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
       // Print the assembly for the instruction.
-      O << "\t";
-      ++EmittedInsts;
-      printInstruction(II);
+      printMachineInstruction(II);
     }
   }
 
-  return false;
-}
+  if (TAI->hasDotTypeDotSizeDirective())
+    O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
 
-void ARMAsmPrinter::printAddrMode1(const MachineInstr *MI, int opNum) {
-  const MachineOperand &Arg       = MI->getOperand(opNum);
-  const MachineOperand &Shift     = MI->getOperand(opNum + 1);
-  const MachineOperand &ShiftType = MI->getOperand(opNum + 2);
-
-  if(Arg.isImmediate()) {
-    assert(Shift.getImmedValue() == 0);
-    printOperand(MI, opNum);
-  } else {
-    assert(Arg.isRegister());
-    printOperand(MI, opNum);
-    if(Shift.isRegister() || Shift.getImmedValue() != 0) {
-      const char *s = NULL;
-      switch(ShiftType.getImmedValue()) {
-      case ARMShift::LSL:
-	s = ", lsl ";
-	break;
-      case ARMShift::LSR:
-	s = ", lsr ";
-	break;
-      case ARMShift::ASR:
-	s = ", asr ";
-	break;
-      case ARMShift::ROR:
-	s = ", ror ";
-	break;
-      case ARMShift::RRX:
-	s = ", rrx ";
-	break;
-      }
-      O << s;
-      printOperand(MI, opNum + 1);
-    }
+  if (Subtarget->isDarwin()) {
+    // Emit post-function debug information.
+    DW.EndFunction();
   }
-}
-
-void ARMAsmPrinter::printAddrMode2(const MachineInstr *MI, int opNum) {
-  const MachineOperand &Arg    = MI->getOperand(opNum);
-  const MachineOperand &Offset = MI->getOperand(opNum + 1);
-  assert(Offset.isImmediate());
 
-  if (Arg.isConstantPoolIndex()) {
-    assert(Offset.getImmedValue() == 0);
-    printOperand(MI, opNum);
-  } else {
-    assert(Arg.isRegister());
-    O << '[';
-    printOperand(MI, opNum);
-    O << ", ";
-    printOperand(MI, opNum + 1);
-    O << ']';
-  }
-}
-
-void ARMAsmPrinter::printAddrMode5(const MachineInstr *MI, int opNum) {
-  const MachineOperand &Arg    = MI->getOperand(opNum);
-  const MachineOperand &Offset = MI->getOperand(opNum + 1);
-  assert(Offset.isImmediate());
-
-  if (Arg.isConstantPoolIndex()) {
-    assert(Offset.getImmedValue() == 0);
-    printOperand(MI, opNum);
-  } else {
-    assert(Arg.isRegister());
-    O << '[';
-    printOperand(MI, opNum);
-    O << ", ";
-    printOperand(MI, opNum + 1);
-    O << ']';
-  }
+  return false;
 }
 
-void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
-  const MachineOperand &MO = MI->getOperand (opNum);
-  const MRegisterInfo &RI = *TM.getRegisterInfo();
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                 const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
-      O << LowercaseString (RI.get(MO.getReg()).Name);
+      O << TM.getRegisterInfo()->get(MO.getReg()).Name;
     else
       assert(0 && "not implemented");
     break;
-  case MachineOperand::MO_Immediate:
-    O << "#" << (int)MO.getImmedValue();
+  case MachineOperand::MO_Immediate: {
+    if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+      O << "#";
+
+    O << (int)MO.getImmedValue();
     break;
+  }
   case MachineOperand::MO_MachineBasicBlock:
     printBasicBlockLabel(MO.getMachineBasicBlock());
     return;
   case MachineOperand::MO_GlobalAddress: {
+    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     GlobalValue *GV = MO.getGlobal();
     std::string Name = Mang->getValueName(GV);
-    O << Name;
-    if (GV->hasExternalWeakLinkage()) {
+    bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+                  GV->hasLinkOnceLinkage());
+    if (isExt && isCallOp && Subtarget->isDarwin() &&
+        TM.getRelocationModel() != Reloc::Static) {
+      O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+      FnStubs.insert(Name);
+    } else
+      O << Name;
+
+    if (GV->hasExternalWeakLinkage())
       ExtWeakSymbols.insert(GV);
-    }
-  }
     break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isCallOp = Modifier && !strcmp(Modifier, "call");
+    std::string Name(TAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    if (isCallOp && Subtarget->isDarwin() &&
+        TM.getRelocationModel() != Reloc::Static) {
+      O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+      FnStubs.insert(Name);
+    } else
+      O << Name;
     break;
+  }
   case MachineOperand::MO_ConstantPoolIndex:
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << MO.getConstantPoolIndex();
     break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getJumpTableIndex();
+    break;
   default:
     O << "<unknown operand type>"; abort (); break;
   }
 }
 
-void ARMAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
-                                      const char *Modifier) {
-  assert(0 && "not implemented");
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImmediate() && (MO.getImmedValue() < (1 << 12)) &&
+         "Not a valid so_imm value!");
+  unsigned Imm = ARM_AM::getSOImmValImm(MO.getImmedValue());
+  unsigned Rot = ARM_AM::getSOImmValRot(MO.getImmedValue());
+  
+  // Print low-level immediate formation info, per
+  // A5.1.3: "Data-processing operands - Immediate".
+  if (Rot) {
+    O << "#" << Imm << ", " << Rot;
+    // Pretty printed version.
+    O << ' ' << TAI->getCommentString() << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+  } else {
+    O << "#" << Imm;
+  }
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0    - e.g. R5
+//    REG REG 0,SH_OPC     - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImmedValue()))
+    << " ";
+
+  if (MO2.getReg()) {
+    assert(MRegisterInfo::isPhysicalRegister(MO2.getReg()));
+    O << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+  } else {
+    O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+  }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isRegister()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+      O << ", #"
+        << (char)ARM_AM::getAM2Op(MO3.getImm())
+        << ARM_AM::getAM2Offset(MO3.getImm());
+    O << "]";
+    return;
+  }
+
+  O << ", "
+    << (char)ARM_AM::getAM2Op(MO3.getImm())
+    << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+  
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImmedValue()))
+      << " #" << ShImm;
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (!MO1.getReg()) {
+    if (ARM_AM::getAM2Offset(MO2.getImm()))  // Don't print +0.
+      O << "#"
+        << (char)ARM_AM::getAM2Op(MO2.getImm())
+        << ARM_AM::getAM2Offset(MO2.getImm());
+    return;
+  }
+
+  O << (char)ARM_AM::getAM2Op(MO2.getImm())
+    << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+  
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+    O << ", "
+      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImmedValue()))
+      << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+  
+  assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+  if (MO2.getReg()) {
+    O << ", "
+      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << TM.getRegisterInfo()->get(MO2.getReg()).Name
+      << "]";
+    return;
+  }
+  
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+    O << ", #"
+      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << ImmOffs;
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (MO1.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO2.getImm())
+      << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  O << "#"
+  << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << ImmOffs;
+}
+  
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+                                          const char *Modifier) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+  if (Modifier && strcmp(Modifier, "submode") == 0) {
+    if (MO1.getReg() == ARM::SP) {
+      bool isLDM = (MI->getOpcode() == ARM::LDM ||
+                    MI->getOpcode() == ARM::LDM_RET);
+      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+    } else
+      O << ARM_AM::getAMSubModeStr(Mode);
+  } else {
+    printOperand(MI, Op);
+    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+      O << "!";
+  }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+                                          const char *Modifier) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (!MO1.isRegister()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+  
+  assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+  if (Modifier && strcmp(Modifier, "submode") == 0) {
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+    if (MO1.getReg() == ARM::SP) {
+      bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+                     MI->getOpcode() == ARM::FLDMS);
+      O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+    } else
+      O << ARM_AM::getAMSubModeStr(Mode);
+    return;
+  } else if (Modifier && strcmp(Modifier, "base") == 0) {
+    // Used for FSTM{D|S} and LSTM{D|S} operations.
+    O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+      O << "!";
+    return;
+  }
+  
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+  
+  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+    O << ", #"
+      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ImmOffs*4;
+  }
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+                                           const char *Modifier) {
+  if (Modifier && strcmp(Modifier, "label") == 0) {
+    printPCLabel(MI, Op+1);
+    return;
+  }
+
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+  O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+                                            unsigned Scale) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (!MO1.isRegister()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+  if (unsigned ImmOffs = MO2.getImm()) {
+    O << ", #" << ImmOffs;
+    if (Scale > 1)
+      O << " * " << Scale;
+  }
+  O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs << " * 4";
+  O << "]";
 }
 
 void ARMAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
@@ -286,9 +560,140 @@
   O << ARMCondCodeToString((ARMCC::CondCodes)CC);
 }
 
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+  int Id = (int)MI->getOperand(opNum).getImmedValue();
+  O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+  O << "{";
+  for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+    printOperand(MI, i);
+    if (i != e-1) O << ", ";
+  }
+  O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+                                       const char *Modifier) {
+  assert(Modifier && "This operand only works with a modifier!");
+  // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+  // data itself.
+  if (!strcmp(Modifier, "label")) {
+    unsigned ID = MI->getOperand(OpNo).getImm();
+    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << ID << ":\n";
+  } else {
+    assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+    unsigned CPI = MI->getOperand(OpNo).getConstantPoolIndex();
+
+    const MachineConstantPoolEntry &MCPE =  // Chasing pointers is fun?
+      MI->getParent()->getParent()->getConstantPool()->getConstants()[CPI];
+    
+    if (MCPE.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    else
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+  }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+  const MachineOperand &MO1 = MI->getOperand(OpNo);
+  const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+  unsigned JTI = MO1.getJumpTableIndex();
+  O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    << '_' << JTI << '_' << MO2.getImmedValue() << ":\n";
+
+  const char *JTEntryDirective = TAI->getJumpTableDirective();
+  if (!JTEntryDirective)
+    JTEntryDirective = TAI->getData32bitsDirective();
+
+  const MachineFunction *MF = MI->getParent()->getParent();
+  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+  std::set<MachineBasicBlock*> JTSets;
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (UseSet && JTSets.insert(MBB).second)
+      printSetLabel(JTI, MO2.getImmedValue(), MBB);
+
+    O << JTEntryDirective << ' ';
+    if (UseSet)
+      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+        << '_' << JTI << '_' << MO2.getImmedValue()
+        << "_set_" << MBB->getNumber();
+    else if (TM.getRelocationModel() == Reloc::PIC_) {
+      printBasicBlockLabel(MBB, false, false);
+      // If the arch uses custom Jump Table directives, don't calc relative to JT
+      if (!TAI->getJumpTableDirective()) 
+        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+          << getFunctionNumber() << '_' << JTI << '_' << MO2.getImmedValue();
+    } else
+      printBasicBlockLabel(MBB, false, false);
+    O << '\n';
+  }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant, const char *ExtraCode){
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'Q':
+      if (TM.getTargetData()->isLittleEndian())
+        break;
+      // Fallthrough
+    case 'R':
+      if (TM.getTargetData()->isBigEndian())
+        break;
+      // Fallthrough
+    case 'H': // Write second word of DI / DF reference.  
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isRegister() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isRegister())
+        return true;
+      ++OpNo;   // Return the high-part.
+    }
+  }
+  
+  printOperand(MI, OpNo);
+  return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+    if (!InCPMode && AFI->isThumbFunction()) {
+      EmitAlignment(2);
+      InCPMode = true;
+    }
+  } else {
+    if (InCPMode && AFI->isThumbFunction()) {
+      EmitAlignment(1);
+      InCPMode = false;
+    }
+    O << "\t";
+  }
+
+  // Call the autogenerated instruction printer routines.
+  printInstruction(MI);
+}
+
 bool ARMAsmPrinter::doInitialization(Module &M) {
-  AsmPrinter::doInitialization(M);
-  return false; // success
+  if (Subtarget->isDarwin()) {
+    // Emit initial debug information.
+    DW.BeginModule(&M);
+  }
+  
+  return AsmPrinter::doInitialization(M);
 }
 
 bool ARMAsmPrinter::doFinalization(Module &M) {
@@ -302,53 +707,154 @@
     if (EmitSpecialLLVMGlobal(I))
       continue;
 
-    O << "\n\n";
     std::string name = Mang->getValueName(I);
     Constant *C = I->getInitializer();
     unsigned Size = TD->getTypeSize(C->getType());
-    unsigned Align = Log2_32(TD->getTypeAlignment(C->getType()));
+    unsigned Align = TD->getPreferredAlignmentLog(I);
 
     if (C->isNullValue() &&
         !I->hasSection() &&
-        (I->hasLinkOnceLinkage() || I->hasInternalLinkage() ||
-         I->hasWeakLinkage())) {
-      SwitchToDataSection(".data", I);
-      if (I->hasInternalLinkage())
-        O << "\t.local " << name << "\n";
-
-      O << "\t.comm " << name << "," << Size
-        << "," << (unsigned) (1 << Align);
-      O << "\n";
+        (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+         I->hasLinkOnceLinkage() ||
+         (Subtarget->isDarwin() &&  I->hasExternalLinkage()))) {
+      if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+      if (I->hasExternalLinkage()) {
+          O << "\t.globl\t" << name << "\n";
+          O << "\t.zerofill __DATA__, __common, " << name << ", "
+            << Size << ", " << Align;
+      } else {
+        SwitchToDataSection(TAI->getDataSection(), I);
+        if (TAI->getLCOMMDirective() != NULL) {
+          if (I->hasInternalLinkage()) {
+            O << TAI->getLCOMMDirective() << name << "," << Size;
+            if (Subtarget->isDarwin())
+              O << "," << Align;
+          } else
+            O << TAI->getCOMMDirective()  << name << "," << Size;
+        } else {
+          if (I->hasInternalLinkage())
+            O << "\t.local\t" << name << "\n";
+          O << TAI->getCOMMDirective()  << name << "," << Size;
+          if (TAI->getCOMMDirectiveTakesAlignment())
+            O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+        }
+      }
+      O << "\t\t" << TAI->getCommentString() << " " << I->getName() << "\n";
+      continue;
     } else {
       switch (I->getLinkage()) {
       default:
         assert(0 && "Unknown linkage type!");
         break;
+      case GlobalValue::LinkOnceLinkage:
+      case GlobalValue::WeakLinkage:
+        if (Subtarget->isDarwin()) {
+          O << "\t.globl " << name << "\n"
+            << "\t.weak_definition " << name << "\n";
+          SwitchToDataSection("\t.section __DATA,__const_coal,coalesced", I);
+        } else {
+          O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\", at progbits\n"
+            << "\t.weak " << name << "\n";
+        }
+        break;
       case GlobalValue::ExternalLinkage:
         O << "\t.globl " << name << "\n";
-        break;
+        // FALL THROUGH
       case GlobalValue::InternalLinkage:
+        if (I->isConstant()) {
+          const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+          if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+            SwitchToDataSection(TAI->getCStringSection(), I);
+            break;
+          }
+        }
+
+        if (I->hasSection() &&
+            (I->getSection() == ".ctors" ||
+             I->getSection() == ".dtors")) {
+          assert(!Subtarget->isDarwin());
+          std::string SectionName = ".section " + I->getSection();
+          SectionName += ",\"aw\", at progbits";
+          SwitchToDataSection(SectionName.c_str());
+        } else {
+          SwitchToDataSection(TAI->getDataSection(), I);
+        }
+
         break;
       }
+    }
 
-      if (I->hasSection() &&
-          (I->getSection() == ".ctors" ||
-           I->getSection() == ".dtors")) {
-        std::string SectionName = ".section " + I->getSection();
+    EmitAlignment(Align, I);
+    if (TAI->hasDotTypeDotSizeDirective()) {
+      O << "\t.type " << name << ", %object\n";
+      O << "\t.size " << name << ", " << Size << "\n";
+    }
+    O << name << ":\n";
+    
+    // If the initializer is a extern weak symbol, remember to emit the weak
+    // reference!
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
 
-        SectionName += ",\"aw\",%progbits";
+    EmitGlobalConstant(C);
+    O << '\n';
+  }
 
-        SwitchToDataSection(SectionName.c_str());
-      } else {
-        SwitchToDataSection(TAI->getDataSection(), I);
+  if (Subtarget->isDarwin()) {
+    // Output stubs for dynamically-linked functions
+    unsigned j = 1;
+    for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+         i != e; ++i, ++j) {
+      if (TM.getRelocationModel() == Reloc::PIC_)
+        SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+                            "none,16", 0);
+      else
+        SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+                            "none,12", 0);
+
+      EmitAlignment(2);
+      O << "\t.code\t32\n";
+
+      O << "L" << *i << "$stub:\n";
+      O << "\t.indirect_symbol " << *i << "\n";
+      O << "\tldr ip, L" << *i << "$slp\n";
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        O << "L" << *i << "$scv:\n";
+        O << "\tadd ip, pc, ip\n";
       }
+      O << "\tldr pc, [ip, #0]\n";
+      O << "L" << *i << "$slp:\n";
+      if (TM.getRelocationModel() == Reloc::PIC_)
+        O << "\t.long\tL" << *i << "$lazy_ptr-(L" << *i << "$scv+8)\n";
+      else
+        O << "\t.long\tL" << *i << "$lazy_ptr\n";
+      SwitchToDataSection(".lazy_symbol_pointer", 0);
+      O << "L" << *i << "$lazy_ptr:\n";
+      O << "\t.indirect_symbol " << *i << "\n";
+      O << "\t.long\tdyld_stub_binding_helper\n";
+    }
+    O << "\n";
 
-      EmitAlignment(Align, I);
-      O << "\t.type " << name << ", %object\n";
-      O << "\t.size " << name << ", " << Size << "\n";
-      O << name << ":\n";
-      EmitGlobalConstant(C);
+    // Output non-lazy-pointers for external and common global variables.
+    if (GVNonLazyPtrs.begin() != GVNonLazyPtrs.end())
+      SwitchToDataSection(".non_lazy_symbol_pointer", 0);
+    for (std::set<std::string>::iterator i = GVNonLazyPtrs.begin(),
+           e = GVNonLazyPtrs.end(); i != e; ++i) {
+      O << "L" << *i << "$non_lazy_ptr:\n";
+      O << "\t.indirect_symbol " << *i << "\n";
+      O << "\t.long\t0\n";
     }
+
+    // Emit initial debug information.
+    DW.EndModule();
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    O << "\t.subsections_via_symbols\n";
   }
 
   AsmPrinter::doFinalization(M);


Index: llvm/lib/Target/ARM/ARMFrameInfo.h
diff -u llvm/lib/Target/ARM/ARMFrameInfo.h:1.2 llvm/lib/Target/ARM/ARMFrameInfo.h:1.3
--- llvm/lib/Target/ARM/ARMFrameInfo.h:1.2	Fri Aug 25 12:55:16 2006
+++ llvm/lib/Target/ARM/ARMFrameInfo.h	Fri Jan 19 01:51:42 2007
@@ -17,17 +17,15 @@
 
 #include "ARM.h"
 #include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "ARMSubtarget.h"
 
 namespace llvm {
 
-class ARMFrameInfo: public TargetFrameInfo {
-
+class ARMFrameInfo : public TargetFrameInfo {
 public:
-  ARMFrameInfo()
-    : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+  ARMFrameInfo(const ARMSubtarget &ST)
+    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
   }
-
 };
 
 } // End llvm namespace


Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
diff -u llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp:1.98 llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp:1.99
--- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp:1.98	Fri Jan 12 14:35:49 2007
+++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp	Fri Jan 19 01:51:42 2007
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMISelLowering.h"
 #include "ARMTargetMachine.h"
-#include "ARMCommon.h"
+#include "ARMAddressingModes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/Constants.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,1081 +28,545 @@
 #include "llvm/CodeGen/SSARegMap.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include <vector>
+#include <iostream>
 using namespace llvm;
 
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
 namespace {
-  class ARMTargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-  public:
-    ARMTargetLowering(TargetMachine &TM);
-    virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
-    virtual const char *getTargetNodeName(unsigned Opcode) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-				      MVT::ValueType VT) const;
-  };
-
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
-  : TargetLowering(TM) {
-  addRegisterClass(MVT::i32, ARM::IntRegsRegisterClass);
-  addRegisterClass(MVT::f32, ARM::FPRegsRegisterClass);
-  addRegisterClass(MVT::f64, ARM::DFPRegsRegisterClass);
-
-  setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
-
-  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-
-  setOperationAction(ISD::RET,           MVT::Other, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
-  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
-
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
-
-  setOperationAction(ISD::SELECT, MVT::i32, Expand);
-  setOperationAction(ISD::SELECT, MVT::f32, Expand);
-  setOperationAction(ISD::SELECT, MVT::f64, Expand);
-
-  setOperationAction(ISD::SETCC, MVT::i32, Expand);
-  setOperationAction(ISD::SETCC, MVT::f32, Expand);
-  setOperationAction(ISD::SETCC, MVT::f64, Expand);
-
-  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-
-  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
-  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
-  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
-
-  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
-
-  setOperationAction(ISD::BRCOND,        MVT::Other, Expand);
-
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SDIV,      MVT::i32, Expand);
-  setOperationAction(ISD::UDIV,      MVT::i32, Expand);
-  setOperationAction(ISD::SREM,      MVT::i32, Expand);
-  setOperationAction(ISD::UREM,      MVT::i32, Expand);
-
-  setOperationAction(ISD::VASTART,       MVT::Other, Custom);
-  setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
-  setOperationAction(ISD::VAEND,         MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
-
-  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
-
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
-  setStackPointerRegisterToSaveRestore(ARM::R13);
-
-  setSchedulingPreference(SchedulingForRegPressure);
-  computeRegisterProperties();
-}
-
-namespace llvm {
-  namespace ARMISD {
-    enum NodeType {
-      // Start the numbering where the builting ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
-      /// CALL - A direct function call.
-      CALL,
-
-      /// Return with a flag operand.
-      RET_FLAG,
-
-      CMP,
-
-      SELECT,
-
-      BR,
-
-      FSITOS,
-      FTOSIS,
-
-      FSITOD,
-      FTOSID,
-
-      FUITOS,
-      FTOUIS,
-
-      FUITOD,
-      FTOUID,
-
-      FMRRD,
+class ARMDAGToDAGISel : public SelectionDAGISel {
+  ARMTargetLowering Lowering;
 
-      FMDRR,
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
 
-      FMSTAT
-    };
+public:
+  ARMDAGToDAGISel(ARMTargetMachine &TM)
+    : SelectionDAGISel(Lowering), Lowering(TM),
+    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
   }
-}
 
-/// DAGFPCCToARMCC - Convert a DAG fp condition code to an ARM CC
-// Unordered = !N & !Z & C & V = V
-// Ordered   =  N | Z | !C | !V = N | Z | !V
-static std::vector<unsigned> DAGFPCCToARMCC(ISD::CondCode CC) {
-  switch (CC) {
-  default:
-    assert(0 && "Unknown fp condition code!");
-// SETOEQ = (N | Z | !V) & Z = Z                               = EQ
-  case ISD::SETEQ:
-  case ISD::SETOEQ: return make_vector<unsigned>(ARMCC::EQ, 0);
-// SETOGT = (N | Z | !V) & !N & !Z = !V &!N &!Z = (N = V) & !Z = GT
-  case ISD::SETGT:
-  case ISD::SETOGT: return make_vector<unsigned>(ARMCC::GT, 0);
-// SETOGE = (N | Z | !V) & !N = (Z | !V) & !N = !V & !N        = GE
-  case ISD::SETGE:
-  case ISD::SETOGE: return make_vector<unsigned>(ARMCC::GE, 0);
-// SETOLT = (N | Z | !V) & N = N                               = MI
-  case ISD::SETLT:
-  case ISD::SETOLT: return make_vector<unsigned>(ARMCC::MI, 0);
-// SETOLE = (N | Z | !V) & (N | Z) = N | Z = !C | Z            = LS
-  case ISD::SETLE:
-  case ISD::SETOLE: return make_vector<unsigned>(ARMCC::LS, 0);
-// SETONE = OGT | OLT 
-  case ISD::SETONE: return make_vector<unsigned>(ARMCC::GT, ARMCC::MI, 0);
-// SETO   = N | Z | !V = Z | !V = !V                           = VC
-  case ISD::SETO:   return make_vector<unsigned>(ARMCC::VC, 0);
-// SETUO  = V                                                  = VS
-  case ISD::SETUO:  return make_vector<unsigned>(ARMCC::VS, 0);
-// SETUEQ = V | Z  (need two instructions)                     = EQ/VS
-  case ISD::SETUEQ: return make_vector<unsigned>(ARMCC::EQ, ARMCC::VS, 0);
-// SETUGT = V | (!Z & !N) = !Z & !N = !Z & C                   = HI
-  case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0);
-// SETUGE = V | !N = !N                                        = PL
-  case ISD::SETUGE: return make_vector<unsigned>(ARMCC::PL, 0);
-// SETULT = V | N                                              = LT
-  case ISD::SETULT: return make_vector<unsigned>(ARMCC::LT, 0);
-// SETULE = V | Z | N                                          = LE
-  case ISD::SETULE: return make_vector<unsigned>(ARMCC::LE, 0);
-// SETUNE = V | !Z = !Z                                        = NE
-  case ISD::SETNE:
-  case ISD::SETUNE: return make_vector<unsigned>(ARMCC::NE, 0);
-  }
-}
+  virtual const char *getPassName() const {
+    return "ARM Instruction Selection";
+  } 
+  
+  SDNode *Select(SDOperand Op);
+  virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+  bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Base,
+                       SDOperand &Offset, SDOperand &Opc);
+  bool SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+                             SDOperand &Offset, SDOperand &Opc);
+  bool SelectAddrMode3(SDOperand Op, SDOperand N, SDOperand &Base,
+                       SDOperand &Offset, SDOperand &Opc);
+  bool SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+                             SDOperand &Offset, SDOperand &Opc);
+  bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Base,
+                       SDOperand &Offset);
 
-/// DAGIntCCToARMCC - Convert a DAG integer condition code to an ARM CC
-static std::vector<unsigned> DAGIntCCToARMCC(ISD::CondCode CC) {
-  switch (CC) {
-  default:
-    assert(0 && "Unknown integer condition code!");
-  case ISD::SETEQ:  return make_vector<unsigned>(ARMCC::EQ, 0);
-  case ISD::SETNE:  return make_vector<unsigned>(ARMCC::NE, 0);
-  case ISD::SETLT:  return make_vector<unsigned>(ARMCC::LT, 0);
-  case ISD::SETLE:  return make_vector<unsigned>(ARMCC::LE, 0);
-  case ISD::SETGT:  return make_vector<unsigned>(ARMCC::GT, 0);
-  case ISD::SETGE:  return make_vector<unsigned>(ARMCC::GE, 0);
-  case ISD::SETULT: return make_vector<unsigned>(ARMCC::CC, 0);
-  case ISD::SETULE: return make_vector<unsigned>(ARMCC::LS, 0);
-  case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0);
-  case ISD::SETUGE: return make_vector<unsigned>(ARMCC::CS, 0);
-  }
+  bool SelectAddrModePC(SDOperand Op, SDOperand N, SDOperand &Offset,
+                         SDOperand &Label);
+
+  bool SelectThumbAddrModeRR(SDOperand Op, SDOperand N, SDOperand &Base,
+                             SDOperand &Offset);
+  bool SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N, SDOperand &Base,
+                                SDOperand &Offset);
+  bool SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N, SDOperand &Base,
+                                SDOperand &Offset);
+  bool SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N, SDOperand &Base,
+                                SDOperand &Offset);
+  bool SelectThumbAddrModeSP(SDOperand Op, SDOperand N, SDOperand &Base,
+                             SDOperand &Offset);
+
+  bool SelectShifterOperandReg(SDOperand Op, SDOperand N, SDOperand &A,
+                               SDOperand &B, SDOperand &C);
+  
+  // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+};
 }
 
-std::vector<unsigned> ARMTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT::ValueType VT) const {
-  if (Constraint.size() == 1) {
-    // FIXME: handling only r regs
-    switch (Constraint[0]) {
-    default: break;  // Unknown constraint letter
-
-    case 'r':   // GENERAL_REGS
-    case 'R':   // LEGACY_REGS
-      if (VT == MVT::i32)
-        return make_vector<unsigned>(ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3,
-                                     ARM::R4,  ARM::R5,  ARM::R6,  ARM::R7,
-                                     ARM::R8,  ARM::R9,  ARM::R10, ARM::R11,
-                                     ARM::R12, ARM::R13, ARM::R14, 0);
-      break;
+void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+  DEBUG(BB->dump());
 
-    }
-  }
+  DAG.setRoot(SelectRoot(DAG.getRoot()));
+  DAG.RemoveDeadNodes();
 
-  return std::vector<unsigned>();
+  ScheduleAndEmitDAG(DAG);
 }
 
-const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return 0;
-  case ARMISD::CALL:          return "ARMISD::CALL";
-  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
-  case ARMISD::SELECT:        return "ARMISD::SELECT";
-  case ARMISD::CMP:           return "ARMISD::CMP";
-  case ARMISD::BR:            return "ARMISD::BR";
-  case ARMISD::FSITOS:        return "ARMISD::FSITOS";
-  case ARMISD::FTOSIS:        return "ARMISD::FTOSIS";
-  case ARMISD::FSITOD:        return "ARMISD::FSITOD";
-  case ARMISD::FTOSID:        return "ARMISD::FTOSID";
-  case ARMISD::FUITOS:        return "ARMISD::FUITOS";
-  case ARMISD::FTOUIS:        return "ARMISD::FTOUIS";
-  case ARMISD::FUITOD:        return "ARMISD::FUITOD";
-  case ARMISD::FTOUID:        return "ARMISD::FTOUID";
-  case ARMISD::FMRRD:         return "ARMISD::FMRRD";
-  case ARMISD::FMDRR:         return "ARMISD::FMDRR";
-  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N,
+                                      SDOperand &Base, SDOperand &Offset,
+                                      SDOperand &Opc) {
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return true;
   }
-}
-
-class ArgumentLayout {
-  std::vector<bool>           is_reg;
-  std::vector<unsigned>       pos;
-  std::vector<MVT::ValueType> types;
-public:
-  ArgumentLayout(const std::vector<MVT::ValueType> &Types) {
-    types = Types;
-
-    unsigned      RegNum = 0;
-    unsigned StackOffset = 0;
-    for(std::vector<MVT::ValueType>::const_iterator I = Types.begin();
-        I != Types.end();
-        ++I) {
-      MVT::ValueType VT = *I;
-      assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-      unsigned     size = MVT::getSizeInBits(VT)/32;
-
-      RegNum = ((RegNum + size - 1) / size) * size;
-      if (RegNum < 4) {
-        pos.push_back(RegNum);
-        is_reg.push_back(true);
-        RegNum += size;
-      } else {
-        unsigned bytes = size * 32/8;
-        StackOffset = ((StackOffset + bytes - 1) / bytes) * bytes;
-        pos.push_back(StackOffset);
-        is_reg.push_back(false);
-        StackOffset += bytes;
+  
+  // Match simple R +/- imm12 operands.
+  if (N.getOpcode() == ISD::ADD)
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getValue();
+      if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits.
+        Base = N.getOperand(0);
+        Offset = CurDAG->getRegister(0, MVT::i32);
+        Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, RHSC,
+                                                          ARM_AM::no_shift),
+                                        MVT::i32);
+        return true;
+      } else if (RHSC < 0 && RHSC > -0x1000) {
+        Base = N.getOperand(0);
+        Offset = CurDAG->getRegister(0, MVT::i32);
+        Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::sub, -RHSC,
+                                                          ARM_AM::no_shift),
+                                        MVT::i32);
+        return true;
       }
     }
-  }
-  unsigned getRegisterNum(unsigned argNum) {
-    assert(isRegister(argNum));
-    return pos[argNum];
-  }
-  unsigned getOffset(unsigned argNum) {
-    assert(isOffset(argNum));
-    return pos[argNum];
-  }
-  unsigned isRegister(unsigned argNum) {
-    assert(argNum < is_reg.size());
-    return is_reg[argNum];
-  }
-  unsigned isOffset(unsigned argNum) {
-    return !isRegister(argNum);
-  }
-  MVT::ValueType getType(unsigned argNum) {
-    assert(argNum < types.size());
-    return types[argNum];
-  }
-  unsigned getStackSize(void) {
-    int last = is_reg.size() - 1;
-    if (last < 0)
-      return 0;
-    if (isRegister(last))
-      return 0;
-    return getOffset(last) + MVT::getSizeInBits(getType(last))/8;
-  }
-  int lastRegArg(void) {
-    int size = is_reg.size();
-    int last = 0;
-    while(last < size && isRegister(last))
-      last++;
-    last--;
-    return last;
-  }
-  int lastRegNum(void) {
-    int            l = lastRegArg();
-    if (l < 0)
-      return -1;
-    unsigned       r = getRegisterNum(l);
-    MVT::ValueType t = getType(l);
-    assert(t == MVT::i32 || t == MVT::f32 || t == MVT::f64);
-    if (t == MVT::f64)
-      return r + 1;
-    return r;
-  }
-};
-
-// This transforms a ISD::CALL node into a
-// callseq_star <- ARMISD:CALL <- callseq_end
-// chain
-static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand Chain    = Op.getOperand(0);
-  unsigned CallConv  = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
-  assert((CallConv == CallingConv::C ||
-          CallConv == CallingConv::Fast)
-         && "unknown calling convention");
-  SDOperand Callee   = Op.getOperand(4);
-  unsigned NumOps    = (Op.getNumOperands() - 5) / 2;
-  SDOperand StackPtr = DAG.getRegister(ARM::R13, MVT::i32);
-  static const unsigned regs[] = {
-    ARM::R0, ARM::R1, ARM::R2, ARM::R3
-  };
-
-  std::vector<MVT::ValueType> Types;
-  for (unsigned i = 0; i < NumOps; ++i) {
-    MVT::ValueType VT = Op.getOperand(5+2*i).getValueType();
-    Types.push_back(VT);
-  }
-  ArgumentLayout Layout(Types);
-
-  unsigned NumBytes = Layout.getStackSize();
-
-  Chain = DAG.getCALLSEQ_START(Chain,
-                               DAG.getConstant(NumBytes, MVT::i32));
-
-  //Build a sequence of stores
-  std::vector<SDOperand> MemOpChains;
-  for (unsigned i = Layout.lastRegArg() + 1; i < NumOps; ++i) {
-    SDOperand      Arg = Op.getOperand(5+2*i);
-    unsigned ArgOffset = Layout.getOffset(i);
-    SDOperand   PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
-    PtrOff             = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
-  }
-  if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-
-  // If the callee is a GlobalAddress node (quite common, every direct call is)
-  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
-  // Likewise ExternalSymbol -> TargetExternalSymbol.
-  assert(Callee.getValueType() == MVT::i32);
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
-  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
-
-  // If this is a direct call, pass the chain and the callee.
-  assert (Callee.Val);
-  std::vector<SDOperand> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain
-  // and flag operands which copy the outgoing args into the appropriate regs.
-  SDOperand InFlag;
-  for (int i = 0, e = Layout.lastRegArg(); i <= e; ++i) {
-    SDOperand     Arg = Op.getOperand(5+2*i);
-    unsigned   RegNum = Layout.getRegisterNum(i);
-    unsigned     Reg1 = regs[RegNum];
-    MVT::ValueType VT = Layout.getType(i);
-    assert(VT == Arg.getValueType());
-    assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-
-    // Add argument register to the end of the list so that it is known live
-    // into the call.
-    Ops.push_back(DAG.getRegister(Reg1, MVT::i32));
-    if (VT == MVT::f64) {
-      unsigned    Reg2 = regs[RegNum + 1];
-      SDOperand SDReg1 = DAG.getRegister(Reg1, MVT::i32);
-      SDOperand SDReg2 = DAG.getRegister(Reg2, MVT::i32);
-
-      Ops.push_back(DAG.getRegister(Reg2, MVT::i32));
-      SDVTList    VTs = DAG.getVTList(MVT::Other, MVT::Flag);
-      SDOperand Ops[] = {Chain, SDReg1, SDReg2, Arg, InFlag};
-      Chain = DAG.getNode(ARMISD::FMRRD, VTs, Ops, InFlag.Val ? 5 : 4);
+  
+  // Otherwise this is R +/- [possibly shifted] R
+  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+  
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+  
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getValue();
+      Offset = N.getOperand(1).getOperand(0);
     } else {
-      if (VT == MVT::f32)
-        Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg);
-      Chain = DAG.getCopyToReg(Chain, Reg1, Arg, InFlag);
+      ShOpcVal = ARM_AM::no_shift;
     }
-    InFlag = Chain.getValue(1);
   }
-
-  std::vector<MVT::ValueType> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
-
-  unsigned CallOpc = ARMISD::CALL;
-  if (InFlag.Val)
-    Ops.push_back(InFlag);
-  Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  std::vector<SDOperand> ResultVals;
-  NodeTys.clear();
-
-  // If the call has results, copy the values out of the ret val registers.
-  MVT::ValueType VT = Op.Val->getValueType(0);
-  if (VT != MVT::Other) {
-    assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-
-    SDOperand Value1 = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag);
-    Chain            = Value1.getValue(1);
-    InFlag           = Value1.getValue(2);
-    NodeTys.push_back(VT);
-    if (VT == MVT::i32) {
-      ResultVals.push_back(Value1);
-      if (Op.Val->getValueType(1) == MVT::i32) {
-        SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag);
-        Chain            = Value2.getValue(1);
-        ResultVals.push_back(Value2);
-        NodeTys.push_back(VT);
+  
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getValue();
+        Offset = N.getOperand(0).getOperand(0);
+        Base = N.getOperand(1);
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
       }
     }
-    if (VT == MVT::f32) {
-      SDOperand Value = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Value1);
-      ResultVals.push_back(Value);
-    }
-    if (VT == MVT::f64) {
-      SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag);
-      Chain            = Value2.getValue(1);
-      SDOperand Value  = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2);
-      ResultVals.push_back(Value);
-    }
   }
-
-  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
-                      DAG.getConstant(NumBytes, MVT::i32));
-  NodeTys.push_back(MVT::Other);
-
-  if (ResultVals.empty())
-    return Chain;
-
-  ResultVals.push_back(Chain);
-  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0],
-                              ResultVals.size());
-  return Res.getValue(Op.ResNo);
+  
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
 }
 
-static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand Copy;
-  SDOperand Chain = Op.getOperand(0);
-  SDOperand    R0 = DAG.getRegister(ARM::R0, MVT::i32);
-  SDOperand    R1 = DAG.getRegister(ARM::R1, MVT::i32);
-
-  switch(Op.getNumOperands()) {
-  default:
-    assert(0 && "Do not know how to return this many arguments!");
-    abort();
-  case 1: {
-    SDOperand LR = DAG.getRegister(ARM::R14, MVT::i32);
-    return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain);
-  }
-  case 3: {
-    SDOperand Val = Op.getOperand(1);
-    assert(Val.getValueType() == MVT::i32 ||
-	   Val.getValueType() == MVT::f32 ||
-	   Val.getValueType() == MVT::f64);
-
-    if (Val.getValueType() == MVT::f64) {
-      SDVTList    VTs = DAG.getVTList(MVT::Other, MVT::Flag);
-      SDOperand Ops[] = {Chain, R0, R1, Val};
-      Copy  = DAG.getNode(ARMISD::FMRRD, VTs, Ops, 4);
-    } else {
-      if (Val.getValueType() == MVT::f32)
-	Val = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Val);
-      Copy = DAG.getCopyToReg(Chain, R0, Val, SDOperand());
-    }
-
-    if (DAG.getMachineFunction().liveout_empty()) {
-      DAG.getMachineFunction().addLiveOut(ARM::R0);
-      if (Val.getValueType() == MVT::f64)
-        DAG.getMachineFunction().addLiveOut(ARM::R1);
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+                                            SDOperand &Offset, SDOperand &Opc) {
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    int Val = (int)C->getValue();
+    if (Val >= 0 && Val < 0x1000) { // 12 bits.
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+                                                        ARM_AM::no_shift),
+                                      MVT::i32);
+      return true;
     }
-    break;
   }
-  case 5:
-    Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand());
-    Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1));
-    // If we haven't noted the R0+R1 are live out, do so now.
-    if (DAG.getMachineFunction().liveout_empty()) {
-      DAG.getMachineFunction().addLiveOut(ARM::R0);
-      DAG.getMachineFunction().addLiveOut(ARM::R1);
+
+  Offset = N;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+  unsigned ShAmt = 0;
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      ShAmt = Sh->getValue();
+      Offset = N.getOperand(0);
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
     }
-    break;
   }
 
-  //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag
-  return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
 }
 
-static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
-  MVT::ValueType PtrVT = Op.getValueType();
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  Constant *C = CP->getConstVal();
-  SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 
-  return CPI;
-}
-
-SDOperand LegalizeImmediate(uint32_t immediate, SelectionDAG &DAG,
-                            bool canReturnConstant){
-  SDOperand Shift = DAG.getTargetConstant(0, MVT::i32);
-  SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32);
-  std::vector<unsigned>immediatePieces = splitImmediate(immediate);
-  if (immediatePieces.size()>1){
-    unsigned movInst = ARM::MOV;
-    unsigned orInst = ARM::ORR;
-    SDNode *node;
-    //try mvn
-    std::vector<unsigned>immediateNegPieces = splitImmediate(~immediate);
-    if (immediatePieces.size() > immediateNegPieces.size()) {
-      //use mvn/eor
-      movInst = ARM::MVN;
-      orInst = ARM::EOR;
-      immediatePieces = immediateNegPieces;
-    }
-    SDOperand n = DAG.getTargetConstant(immediatePieces[0], MVT::i32);
-    node = DAG.getTargetNode(movInst, MVT::i32, n, Shift, ShiftType);
-    std::vector<unsigned>::iterator it;
-    for (it=immediatePieces.begin()+1; it != immediatePieces.end(); ++it){
-      n = DAG.getTargetConstant(*it, MVT::i32);
-      SDOperand ops[] = {SDOperand(node, 0), n, Shift, ShiftType};
-      node = DAG.getTargetNode(orInst, MVT::i32, ops, 4);
+bool ARMDAGToDAGISel::SelectAddrMode3(SDOperand Op, SDOperand N,
+                                      SDOperand &Base, SDOperand &Offset,
+                                      SDOperand &Opc) {
+  if (N.getOpcode() == ISD::SUB) {
+    // X - C  is canonicalize to X + -C, no need to handle it here.
+    Base = N.getOperand(0);
+    Offset = N.getOperand(1);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+    return true;
+  }
+  
+  if (N.getOpcode() != ISD::ADD) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
     }
-    return SDOperand(node, 0);
-  } else {
-    if (canReturnConstant)
-      return DAG.getTargetConstant(immediate, MVT::i32);
-    else {
-      SDOperand n = DAG.getTargetConstant(immediate, MVT::i32);
-      SDNode *node = DAG.getTargetNode(ARM::MOV,  MVT::i32, n, Shift,
-                                       ShiftType);
-      return SDOperand(node, 0);
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+  
+  // If the RHS is +/- imm8, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getValue();
+    if (RHSC >= 0 && RHSC < 256) {
+      Base = N.getOperand(0);
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, RHSC),
+                                      MVT::i32);
+      return true;
+    } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed.
+      Base = N.getOperand(0);
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, -RHSC),
+                                      MVT::i32);
+      return true;
     }
   }
+  
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+  return true;
 }
 
-static SDOperand LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
-  MVT::ValueType VT = Op.getValueType();
-  SDOperand Shift     = DAG.getTargetConstant(0, MVT::i32);
-  SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32);
-  SDNode *node;
-  switch (VT) {
-  default: assert(0 && "VT!=f32 && VT!=f64");
-  case MVT::f32: {
-    float val = cast<ConstantFPSDNode>(Op)->getValue();
-    uint32_t i32_val = FloatToBits(val);
-    SDOperand c = LegalizeImmediate(i32_val, DAG, false);
-    node = DAG.getTargetNode(ARM::FMSR, MVT::f32, c);
-    break;
-  }
-  case MVT::f64: {
-    double val = cast<ConstantFPSDNode>(Op)->getValue();
-    uint64_t i64_val = DoubleToBits(val);
-    SDOperand hi = LegalizeImmediate(Hi_32(i64_val), DAG, false);
-    SDOperand lo = LegalizeImmediate(Lo_32(i64_val), DAG, false);
-    node = DAG.getTargetNode(ARM::FMDRR, MVT::f64, lo, hi);
-    break;
-  }
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+                                            SDOperand &Offset, SDOperand &Opc) {
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    int Val = (int)C->getValue();
+    if (Val >= 0 && Val < 256) {
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+      return true;
+    }
   }
-  return SDOperand(node, 0);
-}
 
-static SDOperand LowerGlobalAddress(SDOperand Op,
-				    SelectionDAG &DAG) {
-  GlobalValue  *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  int alignment = 2;
-  SDOperand CPAddr = DAG.getConstantPool(GV, MVT::i32, alignment);
-  return DAG.getLoad(MVT::i32, DAG.getEntryNode(), CPAddr, NULL, 0);
+  Offset = N;
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+  return true;
 }
 
-static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
-                              unsigned VarArgsFrameIndex) {
-  // vastart just stores the address of the VarArgsFrameIndex slot into the
-  // memory location argument.
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
-  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
-  return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
-                      SV->getOffset());
-}
 
-static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
-				       int &VarArgsFrameIndex) {
-  MachineFunction   &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  SSARegMap     *RegMap = MF.getSSARegMap();
-  unsigned      NumArgs = Op.Val->getNumValues()-1;
-  SDOperand        Root = Op.getOperand(0);
-  bool         isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
-  static const unsigned REGS[] = {
-    ARM::R0, ARM::R1, ARM::R2, ARM::R3
-  };
-
-  std::vector<MVT::ValueType> Types(Op.Val->value_begin(), Op.Val->value_end() - 1);
-  ArgumentLayout Layout(Types);
-
-  std::vector<SDOperand> ArgValues;
-  for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) {
-    MVT::ValueType VT = Types[ArgNo];
-
-    SDOperand Value;
-    if (Layout.isRegister(ArgNo)) {
-      assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-      unsigned  RegNum = Layout.getRegisterNum(ArgNo);
-      unsigned    Reg1 = REGS[RegNum];
-      unsigned   VReg1 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
-      SDOperand Value1 = DAG.getCopyFromReg(Root, VReg1, MVT::i32);
-      MF.addLiveIn(Reg1, VReg1);
-      if (VT == MVT::f64) {
-        unsigned    Reg2 = REGS[RegNum + 1];
-        unsigned   VReg2 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
-        SDOperand Value2 = DAG.getCopyFromReg(Root, VReg2, MVT::i32);
-        MF.addLiveIn(Reg2, VReg2);
-        Value            = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2);
-      } else {
-        Value = Value1;
-        if (VT == MVT::f32)
-          Value = DAG.getNode(ISD::BIT_CONVERT, VT, Value);
-      }
-    } else {
-      // If the argument is actually used, emit a load from the right stack
-      // slot.
-      if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
-        unsigned Offset = Layout.getOffset(ArgNo);
-        unsigned   Size = MVT::getSizeInBits(VT)/8;
-        int          FI = MFI->CreateFixedObject(Size, Offset);
-        SDOperand   FIN = DAG.getFrameIndex(FI, VT);
-        Value = DAG.getLoad(VT, Root, FIN, NULL, 0);
-      } else {
-        Value = DAG.getNode(ISD::UNDEF, VT);
-      }
+bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op, SDOperand N,
+                                      SDOperand &Base, SDOperand &Offset) {
+  if (N.getOpcode() != ISD::ADD) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
     }
-    ArgValues.push_back(Value);
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                       MVT::i32);
+    return true;
   }
-
-  unsigned NextRegNum = Layout.lastRegNum() + 1;
-
-  if (isVarArg) {
-    //If this function is vararg we must store the remaing
-    //registers so that they can be acessed with va_start
-    VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8,
-                                               -16 + NextRegNum * 4);
-
-    SmallVector<SDOperand, 4> MemOps;
-    for (unsigned RegNo = NextRegNum; RegNo < 4; ++RegNo) {
-      int RegOffset = - (4 - RegNo) * 4;
-      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8,
-				      RegOffset);
-      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
-
-      unsigned VReg = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
-      MF.addLiveIn(REGS[RegNo], VReg);
-
-      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);
-      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
-      MemOps.push_back(Store);
+  
+  // If the RHS is +/- imm8, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getValue();
+    if ((RHSC & 3) == 0) {  // The constant is implicitly multiplied by 4.
+      RHSC >>= 2;
+      if (RHSC >= 0 && RHSC < 256) {
+        Base = N.getOperand(0);
+        Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, RHSC),
+                                           MVT::i32);
+        return true;
+      } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed.
+        Base = N.getOperand(0);
+        Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::sub,-RHSC),
+                                           MVT::i32);
+        return true;
+      }
     }
-    Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
   }
-
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
-                                    Op.Val->value_end());
-  return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
-}
-
-static SDOperand GetCMP(ISD::CondCode CC, SDOperand LHS, SDOperand RHS,
-                        SelectionDAG &DAG) {
-  MVT::ValueType vt = LHS.getValueType();
-  assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64);
-
-  SDOperand Cmp = DAG.getNode(ARMISD::CMP,  MVT::Flag, LHS, RHS);
-
-  if (vt != MVT::i32)
-    Cmp = DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp);
-  return Cmp;
-}
-
-static std::vector<SDOperand> GetARMCC(ISD::CondCode CC, MVT::ValueType vt,
-                          SelectionDAG &DAG) {
-  assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64);
-  std::vector<unsigned> vcc;
-  if (vt == MVT::i32)
-    vcc = DAGIntCCToARMCC(CC);
-  else
-    vcc = DAGFPCCToARMCC(CC);
-
-  std::vector<unsigned>::iterator it;
-  std::vector<SDOperand> result;
-  for( it = vcc.begin(); it != vcc.end(); it++ )
-    result.push_back(DAG.getConstant(*it,MVT::i32));
-  return result;
-}
-
-static bool isUInt8Immediate(uint32_t x) {
-  return x < (1 << 8);
-}
-
-static uint32_t rotateL(uint32_t x) {
-  uint32_t bit31 = (x & (1 << 31)) >> 31;
-  uint32_t     t = x << 1;
-  return t | bit31;
+  
+  Base = N;
+  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                     MVT::i32);
+  return true;
 }
 
-static bool isRotInt8Immediate(uint32_t x) {
-  int r;
-  for (r = 0; r < 16; r++) {
-    if (isUInt8Immediate(x))
-      return true;
-    x = rotateL(rotateL(x));
+bool ARMDAGToDAGISel::SelectAddrModePC(SDOperand Op, SDOperand N,
+                                        SDOperand &Offset, SDOperand &Label) {
+  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+    Offset = N.getOperand(0);
+    SDOperand N1 = N.getOperand(1);
+    Label  = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getValue(),
+                                       MVT::i32);
+    return true;
   }
   return false;
 }
 
-static void LowerCMP(SDOperand &Cmp, std::vector<SDOperand> &ARMCC,
-                     SDOperand LHS, SDOperand RHS, ISD::CondCode CC,
-                     SelectionDAG &DAG) {
-  MVT::ValueType vt = LHS.getValueType();
-  if (vt == MVT::i32) {
-    assert(!isa<ConstantSDNode>(LHS));
-    if (ConstantSDNode *SD_C = dyn_cast<ConstantSDNode>(RHS.Val)) {
-      uint32_t C = SD_C->getValue();
-
-      uint32_t NC;
-      switch(CC) {
-      default:
-        NC = C; break;
-      case ISD::SETLT:
-      case ISD::SETULT:
-      case ISD::SETGE:
-      case ISD::SETUGE:
-        NC = C - 1; break;
-      case ISD::SETLE:
-      case ISD::SETULE:
-      case ISD::SETGT:
-      case ISD::SETUGT:
-        NC = C + 1; break;
-      }
-
-      ISD::CondCode NCC;
-      switch(CC) {
-      default:
-        NCC = CC; break;
-      case ISD::SETLT:
-        NCC = ISD::SETLE; break;
-      case ISD::SETULT:
-        NCC = ISD::SETULE; break;
-      case ISD::SETGE:
-        NCC = ISD::SETGT; break;
-      case ISD::SETUGE:
-        NCC = ISD::SETUGT; break;
-      case ISD::SETLE:
-        NCC = ISD::SETLT; break;
-      case ISD::SETULE:
-        NCC = ISD::SETULT; break;
-      case ISD::SETGT:
-        NCC = ISD::SETGE; break;
-      case ISD::SETUGT:
-        NCC = ISD::SETUGE; break;
-      }
-
-      if (!isRotInt8Immediate(C) && isRotInt8Immediate(NC)) {
-        RHS = DAG.getConstant(NC, MVT::i32);
-        CC  = NCC;
-      }
-    }
-  }
-  Cmp   = GetCMP(CC, LHS, RHS, DAG);
-  ARMCC = GetARMCC(CC, vt, DAG);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDOperand Op, SDOperand N,
+                                            SDOperand &Base, SDOperand &Offset){
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
 }
 
-static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand LHS = Op.getOperand(0);
-  SDOperand RHS = Op.getOperand(1);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDOperand TrueVal = Op.getOperand(2);
-  SDOperand FalseVal = Op.getOperand(3);
-  SDOperand Cmp;
-  std::vector<SDOperand> ARMCC;
-  LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG);
-
-  SDOperand Aux = FalseVal;
-  SDVTList  VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
-  std::vector<SDOperand>::iterator it;
-  for (it = ARMCC.begin(); it != ARMCC.end(); ++it){
-    SDOperand  Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1);
-    SDOperand Ops[] = {TrueVal, Aux, *it, Flag};
-    Aux  = DAG.getNode(ARMISD::SELECT, VTs, Ops, 4);
+static bool SelectThumbAddrModeRI5(SDOperand N, unsigned Scale,
+                                   TargetLowering &TLI, SelectionDAG *CurDAG,
+                                   SDOperand &Base, SDOperand &Offset) {
+  if (N.getOpcode() == ISD::FrameIndex)
+    return false;
+    
+  if (N.getOpcode() != ISD::ADD) {
+    Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
   }
-  return Aux;
-}
 
-static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand  Chain = Op.getOperand(0);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDOperand    LHS = Op.getOperand(2);
-  SDOperand    RHS = Op.getOperand(3);
-  SDOperand   Dest = Op.getOperand(4);
-  SDOperand Cmp;
-  std::vector<SDOperand> ARMCC;
-  LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG);
-
-  SDOperand Aux = Chain;
-  SDVTList  VTs = DAG.getVTList(MVT::Other, MVT::Flag);
-  std::vector<SDOperand>::iterator it;
-  for (it = ARMCC.begin(); it != ARMCC.end(); it++){
-    SDOperand  Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1);
-    SDOperand Ops[] = {Aux, Dest, *it, Flag};
-    Aux = DAG.getNode(ARMISD::BR, VTs, Ops, 4);
+  // If the RHS is + imm5 * scale, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getValue();
+    if ((RHSC & (Scale-1)) == 0) {  // The constant is implicitly multiplied.
+      RHSC /= Scale;
+      if (RHSC >= 0 && RHSC < 32) {
+        Base = N.getOperand(0);
+        Offset = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
   }
-  return Aux;
-}
 
-static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand IntVal  = Op.getOperand(0);
-  assert(IntVal.getValueType() == MVT::i32);
-  MVT::ValueType vt = Op.getValueType();
-  assert(vt == MVT::f32 ||
-         vt == MVT::f64);
-
-  SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal);
-  ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FSITOS : ARMISD::FSITOD;
-  return DAG.getNode(op, vt, Tmp);
-}
-
-static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
-  assert(Op.getValueType() == MVT::i32);
-  SDOperand FloatVal = Op.getOperand(0);
-  MVT::ValueType  vt = FloatVal.getValueType();
-  assert(vt == MVT::f32 || vt == MVT::f64);
-
-  ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOSIS : ARMISD::FTOSID;
-  SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal);
-  return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp);
+  return false;
 }
 
-static SDOperand LowerUINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand IntVal  = Op.getOperand(0);
-  assert(IntVal.getValueType() == MVT::i32);
-  MVT::ValueType vt = Op.getValueType();
-  assert(vt == MVT::f32 ||
-         vt == MVT::f64);
-
-  SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal);
-  ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FUITOS : ARMISD::FUITOD;
-  return DAG.getNode(op, vt, Tmp);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N,
+                                            SDOperand &Base, SDOperand &Offset){
+  return SelectThumbAddrModeRI5(N, 1, TLI, CurDAG, Base, Offset);
 }
 
-static SDOperand LowerFP_TO_UINT(SDOperand Op, SelectionDAG &DAG) {
-  assert(Op.getValueType() == MVT::i32);
-  SDOperand FloatVal = Op.getOperand(0);
-  MVT::ValueType  vt = FloatVal.getValueType();
-  assert(vt == MVT::f32 || vt == MVT::f64);
-
-  ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOUIS : ARMISD::FTOUID;
-  SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal);
-  return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N,
+                                            SDOperand &Base, SDOperand &Offset){
+  return SelectThumbAddrModeRI5(N, 2, TLI, CurDAG, Base, Offset);
 }
 
-SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
-  switch (Op.getOpcode()) {
-  default:
-    assert(0 && "Should not custom lower this!");
-    abort();
-  case ISD::ConstantPool:
-    return LowerConstantPool(Op, DAG);
-  case ISD::ConstantFP:
-    return LowerConstantFP(Op, DAG);
-  case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG);
-  case ISD::FP_TO_SINT:
-    return LowerFP_TO_SINT(Op, DAG);
-  case ISD::SINT_TO_FP:
-    return LowerSINT_TO_FP(Op, DAG);
-  case ISD::FP_TO_UINT:
-    return LowerFP_TO_UINT(Op, DAG);
-  case ISD::UINT_TO_FP:
-    return LowerUINT_TO_FP(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS:
-    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
-  case ISD::CALL:
-    return LowerCALL(Op, DAG);
-  case ISD::RET:
-    return LowerRET(Op, DAG);
-  case ISD::SELECT_CC:
-    return LowerSELECT_CC(Op, DAG);
-  case ISD::BR_CC:
-    return LowerBR_CC(Op, DAG);
-  case ISD::VASTART:
-    return LowerVASTART(Op, DAG, VarArgsFrameIndex);
-  }
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N,
+                                            SDOperand &Base, SDOperand &Offset){
+  return SelectThumbAddrModeRI5(N, 4, TLI, CurDAG, Base, Offset);
 }
 
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===--------------------------------------------------------------------===//
-/// ARMDAGToDAGISel - ARM specific code to select ARM machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
-class ARMDAGToDAGISel : public SelectionDAGISel {
-  ARMTargetLowering Lowering;
-
-public:
-  ARMDAGToDAGISel(TargetMachine &TM)
-    : SelectionDAGISel(Lowering), Lowering(TM) {
-  }
-
-  SDNode *Select(SDOperand Op);
-  virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
-  bool SelectAddrMode1(SDOperand Op, SDOperand N, SDOperand &Arg,
-                       SDOperand &Shift, SDOperand &ShiftType);
-  bool SelectAddrMode1a(SDOperand Op, SDOperand N, SDOperand &Arg,
-			SDOperand &Shift, SDOperand &ShiftType);
-  bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Arg,
-                       SDOperand &Offset);
-  bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Arg,
-                       SDOperand &Offset);
-
-  // Include the pieces autogenerated from the target description.
-#include "ARMGenDAGISel.inc"
-};
-
-void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
-  DEBUG(BB->dump());
-
-  DAG.setRoot(SelectRoot(DAG.getRoot()));
-  DAG.RemoveDeadNodes();
-
-  ScheduleAndEmitDAG(DAG);
-}
-
-static bool isInt12Immediate(SDNode *N, short &Imm) {
-  if (N->getOpcode() != ISD::Constant)
-    return false;
-
-  int32_t t = cast<ConstantSDNode>(N)->getValue();
-  int max = 1<<12;
-  int min = -max;
-  if (t > min && t < max) {
-    Imm = t;
-    return true;
-  }
-  else
-    return false;
-}
-
-static bool isInt12Immediate(SDOperand Op, short &Imm) {
-  return isInt12Immediate(Op.Val, Imm);
-}
-
-bool ARMDAGToDAGISel::SelectAddrMode1(SDOperand Op,
-                                      SDOperand N,
-				      SDOperand &Arg,
-				      SDOperand &Shift,
-				      SDOperand &ShiftType) {
-  switch(N.getOpcode()) {
-  case ISD::Constant: {
-    uint32_t val = cast<ConstantSDNode>(N)->getValue();
-    Shift        = CurDAG->getTargetConstant(0, MVT::i32);
-    ShiftType    = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
-    Arg = LegalizeImmediate(val, *CurDAG, true);
-    return true;
-  }
-
-  case ISD::SRA:
-    Arg       = N.getOperand(0);
-    Shift     = N.getOperand(1);
-    ShiftType = CurDAG->getTargetConstant(ARMShift::ASR, MVT::i32);
-    return true;
-  case ISD::SRL:
-    Arg       = N.getOperand(0);
-    Shift     = N.getOperand(1);
-    ShiftType = CurDAG->getTargetConstant(ARMShift::LSR, MVT::i32);
-    return true;
-  case ISD::SHL:
-    Arg       = N.getOperand(0);
-    Shift     = N.getOperand(1);
-    ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDOperand Op, SDOperand N,
+                                           SDOperand &Base, SDOperand &Offset) {
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
-
-  Arg       = N;
-  Shift     = CurDAG->getTargetConstant(0, MVT::i32);
-  ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
-  return true;
+  
+  return false;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N,
-                                      SDOperand &Arg, SDOperand &Offset) {
-  //TODO: complete and cleanup!
-  SDOperand Zero = CurDAG->getTargetConstant(0, MVT::i32);
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
-    Arg    = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = Zero;
-    return true;
-  }
-  if (N.getOpcode() == ISD::ADD) {
-    short imm = 0;
-    if (isInt12Immediate(N.getOperand(1), imm)) {
-      Offset = CurDAG->getTargetConstant(imm, MVT::i32);
-      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
-	Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
-      } else {
-	Arg = N.getOperand(0);
-      }
-      return true; // [r+i]
-    }
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDOperand Op,
+                                              SDOperand N, 
+                                              SDOperand &BaseReg,
+                                              SDOperand &ShReg,
+                                              SDOperand &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+  
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
   }
-  Offset = Zero;
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
-    Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
-  else
-    Arg = N;
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op,
-                                      SDOperand N, SDOperand &Arg,
-                                      SDOperand &Offset) {
-  //TODO: detect offset
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  Arg    = N;
-  return true;
-}
 
 SDNode *ARMDAGToDAGISel::Select(SDOperand Op) {
   SDNode *N = Op.Val;
+  unsigned Opcode = N->getOpcode();
+
+  if (Opcode >= ISD::BUILTIN_OP_END && Opcode < ARMISD::FIRST_NUMBER)
+    return NULL;   // Already selected.
 
   switch (N->getOpcode()) {
-  default:
-    return SelectCode(Op);
+  default: break;
+  case ISD::Constant: {
+    unsigned Val = cast<ConstantSDNode>(N)->getValue();
+    bool UseCP = true;
+    if (Subtarget->isThumb())
+      UseCP = (Val > 255 &&                          // MOV
+               ~Val > 255 &&                         // MOV + MVN
+               !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
+    else
+      UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
+               ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
+               !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+    if (UseCP) {
+      SDOperand CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+                                      TLI.getPointerTy());
+      SDOperand Ops[] = {
+        CPIdx, 
+        CurDAG->getRegister(0, MVT::i32),
+        CurDAG->getTargetConstant(0, MVT::i32),
+        CurDAG->getEntryNode()
+      };
+      SDNode *ResNode = 
+        CurDAG->getTargetNode(ARM::LDR, MVT::i32, MVT::Other, Ops, 4);
+      ReplaceUses(Op, SDOperand(ResNode, 0));
+      return NULL;
+    }
+      
+    // Other cases are autogenerated.
     break;
+  }
   case ISD::FrameIndex: {
+    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDOperand Ops[] = {CurDAG->getTargetFrameIndex(FI, MVT::i32),
-                       CurDAG->getTargetConstant(0, MVT::i32),
-                       CurDAG->getTargetConstant(0, MVT::i32),
-                       CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32)};
+    unsigned Opc = Subtarget->isThumb() ? ARM::tADDrSPi : ARM::ADDri;
+    SDOperand TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, TFI,
+                                CurDAG->getTargetConstant(0, MVT::i32));
+  }
+  case ISD::MUL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RHSV = C->getValue();
+      if (!RHSV) break;
+      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
+        SDOperand V = Op.getOperand(0);
+        AddToISelQueue(V);
+        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+        SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getTargetConstant(ShImm, MVT::i32)
+        };
+        return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 4);
+      }
+      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
+        SDOperand V = Op.getOperand(0);
+        AddToISelQueue(V);
+        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+        SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getTargetConstant(ShImm, MVT::i32)
+        };
+        return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 4);
+      }
+    }
+    break;
+  case ARMISD::FMRRD:
+    AddToISelQueue(Op.getOperand(0));
+    return CurDAG->getTargetNode(ARM::FMRRD, MVT::i32, MVT::i32,
+                                 Op.getOperand(0));
+  case ARMISD::MULHILOU:
+    AddToISelQueue(Op.getOperand(0));
+    AddToISelQueue(Op.getOperand(1));
+    return CurDAG->getTargetNode(ARM::UMULL, MVT::i32, MVT::i32,
+                                 Op.getOperand(0), Op.getOperand(1));
+  case ARMISD::MULHILOS:
+    AddToISelQueue(Op.getOperand(0));
+    AddToISelQueue(Op.getOperand(1));
+    return CurDAG->getTargetNode(ARM::SMULL, MVT::i32, MVT::i32,
+                                 Op.getOperand(0), Op.getOperand(1));
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    ISD::MemIndexedMode AM = LD->getAddressingMode();
+    MVT::ValueType LoadedVT = LD->getLoadedVT();
+    if (AM != ISD::UNINDEXED) {
+      SDOperand Offset, AMOpc;
+      bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+      unsigned Opcode = 0;
+      bool Match = false;
+      if (LoadedVT == MVT::i32 &&
+          SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+        Match = true;
+      } else if (LoadedVT == MVT::i16 &&
+                 SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+          ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+          : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+      } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+        if (LD->getExtensionType() == ISD::SEXTLOAD) {
+          if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+            Match = true;
+            Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+          }
+        } else {
+          if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+            Match = true;
+            Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+          }
+        }
+      }
 
-    return CurDAG->SelectNodeTo(N, ARM::ADD, MVT::i32, Ops,
-                                sizeof(Ops)/sizeof(SDOperand));
+      if (Match) {
+        SDOperand Chain = LD->getChain();
+        SDOperand Base = LD->getBasePtr();
+        AddToISelQueue(Chain);
+        AddToISelQueue(Base);
+        AddToISelQueue(Offset);
+        SDOperand Ops[] = { Base, Offset, AMOpc, Chain };
+        return CurDAG->getTargetNode(Opcode, MVT::i32, MVT::i32,
+                                     MVT::Other, Ops, 4);
+      }
+    }
+    // Other cases are autogenerated.
     break;
   }
   }
-}
 
-}  // end anonymous namespace
+  return SelectCode(Op);
+}
 
 /// createARMISelDag - This pass converts a legalized DAG into a
 /// ARM-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createARMISelDag(TargetMachine &TM) {
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
   return new ARMDAGToDAGISel(TM);
 }


Index: llvm/lib/Target/ARM/ARMInstrInfo.cpp
diff -u llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.10 llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.11
--- llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.10	Mon Nov 27 17:37:22 2006
+++ llvm/lib/Target/ARM/ARMInstrInfo.cpp	Fri Jan 19 01:51:42 2007
@@ -14,46 +14,409 @@
 
 #include "ARMInstrInfo.h"
 #include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-ARMInstrInfo::ARMInstrInfo()
+static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+                                  cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
   : TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])),
-    RI(*this) {
+    RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getDWARF_LABELOpcode() const {
+  return ARM::DWARF_LABEL;
 }
 
 const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const {
-  return &ARM::IntRegsRegClass;
+  return &ARM::GPRRegClass;
 }
 
 /// Return true if the instruction is a register to register move and
 /// leave the source and dest operands in the passed parameters.
 ///
 bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg, unsigned &DstReg) const {
+                               unsigned &SrcReg, unsigned &DstReg) const {
   MachineOpCode oc = MI.getOpcode();
   switch (oc) {
-  case ARM::MOV: {
-    assert(MI.getNumOperands() == 4 &&
-	   MI.getOperand(0).isRegister() &&
+  default:
+    return false;
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  case ARM::MOVrr:
+  case ARM::tMOVrr:
+    assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() &&
+	   MI.getOperand(1).isRegister() &&
 	   "Invalid ARM MOV instruction");
-    const MachineOperand   &Arg = MI.getOperand(1);
-    const MachineOperand &Shift = MI.getOperand(2);
-    if (Arg.isRegister() && Shift.isImmediate() && Shift.getImmedValue() == 0) {
-      SrcReg = MI.getOperand(1).getReg();
-      DstReg = MI.getOperand(0).getReg();
-      return true;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDR:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImmediate() && 
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FLDD:
+  case ARM::FLDS:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isImmediate() && 
+        MI->getOperand(2).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::tLDRspi:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isImmediate() && 
+        MI->getOperand(2).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STR:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImmediate() && 
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FSTD:
+  case ARM::FSTS:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isImmediate() && 
+        MI->getOperand(2).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
     }
+    break;
+  case ARM::tSTRspi:
+    if (MI->getOperand(1).isFrameIndex() &&
+        MI->getOperand(2).isImmediate() && 
+        MI->getOperand(2).getImmedValue() == 0) {
+      FrameIndex = MI->getOperand(1).getFrameIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
   }
+  return 0;
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+  switch (Opc) {
+  default: break;
+  case ARM::LDR_PRE:
+  case ARM::LDR_POST:
+    return ARM::LDR;
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+    return ARM::LDRH;
+  case ARM::LDRB_PRE:
+  case ARM::LDRB_POST:
+    return ARM::LDRB;
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+    return ARM::LDRSH;
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+    return ARM::LDRSB;
+  case ARM::STR_PRE:
+  case ARM::STR_POST:
+    return ARM::STR;
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+    return ARM::STRH;
+  case ARM::STRB_PRE:
+  case ARM::STRB_POST:
+    return ARM::STRB;
   }
-  return false;
+  return 0;
 }
 
-void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
-                                  MachineBasicBlock *FBB,
-                                  const std::vector<MachineOperand> &Cond)const{
-  // Can only insert uncond branches so far.
-  assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
-  BuildMI(&MBB, get(ARM::b)).addMBB(TBB);
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    LiveVariables &LV) const {
+  if (!EnableARM3Addr)
+    return NULL;
+
+  MachineInstr *MI = MBBI;
+  unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
+  bool isPre = false;
+  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+  default: return NULL;
+  case ARMII::IndexModePre:
+    isPre = true;
+    break;
+  case ARMII::IndexModePost:
+    break;
+  }
+
+  // Try spliting an indexed load / store to a un-indexed one plus an add/sub
+  // operation.
+  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+  if (MemOpc == 0)
+    return NULL;
+
+  MachineInstr *UpdateMI = NULL;
+  MachineInstr *MemMI = NULL;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  unsigned NumOps = MI->getNumOperands();
+  bool isLoad = (MI->getInstrDescriptor()->Flags & M_LOAD_FLAG) != 0;
+  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+  const MachineOperand &Base = MI->getOperand(2);
+  const MachineOperand &Offset = MI->getOperand(NumOps-2);
+  unsigned WBReg = WB.getReg();
+  unsigned BaseReg = Base.getReg();
+  unsigned OffReg = Offset.getReg();
+  unsigned OffImm = MI->getOperand(NumOps-1).getImm();
+  switch (AddrMode) {
+  default:
+    assert(false && "Unknown indexed op!");
+    return NULL;
+  case ARMII::AddrMode2: {
+    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+    if (OffReg == 0) {
+      int SOImmVal = ARM_AM::getSOImmVal(Amt);
+      if (SOImmVal == -1)
+        // Can't encode it in a so_imm operand. This transformation will
+        // add more than 1 instruction. Abandon!
+        return NULL;
+      UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(SOImmVal);
+    } else if (Amt != 0) {
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+      UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc);
+    } else 
+      UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg);
+    break;
+  }
+  case ARMII::AddrMode3 : {
+    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+    if (OffReg == 0)
+      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+      UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt);
+    else
+      UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg);
+    break;
+  }
+  }
+
+  std::vector<MachineInstr*> NewMIs;
+  if (isPre) {
+    if (isLoad)
+      MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(WBReg).addReg(0).addImm(0);
+    else
+      MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(WBReg).addReg(0).addImm(0);
+    NewMIs.push_back(MemMI);
+    NewMIs.push_back(UpdateMI);
+  } else {
+    if (isLoad)
+      MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0);
+    else
+      MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0);
+    if (WB.isDead())
+      UpdateMI->getOperand(0).setIsDead();
+    NewMIs.push_back(UpdateMI);
+    NewMIs.push_back(MemMI);
+  }
+  
+  // Transfer LiveVariables states, kill / dead info.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isRegister() && MO.getReg() &&
+        MRegisterInfo::isVirtualRegister(MO.getReg())) {
+      unsigned Reg = MO.getReg();
+      LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+      if (MO.isDef()) {
+        MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+        if (MO.isDead())
+          LV.addVirtualRegisterDead(Reg, NewMI);
+        // Update the defining instruction.
+        if (VI.DefInst == MI)
+          VI.DefInst = NewMI;
+      }
+      if (MO.isUse() && MO.isKill()) {
+        for (unsigned j = 0; j < 2; ++j) {
+          // Look at the two new MI's in reverse order.
+          MachineInstr *NewMI = NewMIs[j];
+          MachineOperand *NMO = NewMI->findRegisterUseOperand(Reg);
+          if (!NMO)
+            continue;
+          LV.addVirtualRegisterKilled(Reg, NewMI);
+          if (VI.removeKill(MI))
+            VI.Kills.push_back(NewMI);
+          break;
+        }
+      }
+    }
+  }
+
+  MFI->insert(MBBI, NewMIs[1]);
+  MFI->insert(MBBI, NewMIs[0]);
+  return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 std::vector<MachineOperand> &Cond) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode()))
+    return false;
+  
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) {
+    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+      TBB = LastInst->getOperand(0).getMachineBasicBlock();
+      return false;
+    }
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMachineBasicBlock();
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+  
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isTerminatorInstr((--I)->getOpcode()))
+    return true;
+  
+  // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+  if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+    TBB =  SecondLastInst->getOperand(0).getMachineBasicBlock();
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMachineBasicBlock();
+    return false;
+  }
+  
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+void ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return;
+  --I;
+  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+    return;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+  
+  if (I == MBB.begin()) return;
+  --I;
+  if (I->getOpcode() != BccOpc)
+    return;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+}
+
+void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const std::vector<MachineOperand> &Cond) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "ARM branch conditions have two components!");
+  
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch?
+      BuildMI(&MBB, get(BOpc)).addMBB(TBB);
+    else
+      BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
+    return;
+  }
+  
+  // Two-way conditional branch.
+  BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
+  BuildMI(&MBB, get(BOpc)).addMBB(FBB);
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+  
+  switch (MBB.back().getOpcode()) {
+  case ARM::B:
+  case ARM::tB:       // Uncond branch.
+  case ARM::BR_JTr:   // Jumptable branch.
+  case ARM::BR_JTm:   // Jumptable branch through mem.
+  case ARM::BR_JTadd: // Jumptable branch add to pc.
+    return true;
+  default: return false;
+  }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
 }


Index: llvm/lib/Target/ARM/ARMInstrInfo.h
diff -u llvm/lib/Target/ARM/ARMInstrInfo.h:1.4 llvm/lib/Target/ARM/ARMInstrInfo.h:1.5
--- llvm/lib/Target/ARM/ARMInstrInfo.h:1.4	Tue Oct 24 11:47:57 2006
+++ llvm/lib/Target/ARM/ARMInstrInfo.h	Fri Jan 19 01:51:42 2007
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.h - ARM Instruction Information --------------*- C++ -*-===//
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,11 +19,56 @@
 #include "ARMRegisterInfo.h"
 
 namespace llvm {
+  class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction Flags.
+
+    //===------------------------------------------------------------------===//
+    // This three-bit field describes the addressing mode used.  Zero is unused
+    // so that we can tell if we forgot to set a value.
+
+    AddrModeMask  = 0xf,
+    AddrMode1     = 1,
+    AddrMode2     = 2,
+    AddrMode3     = 3,
+    AddrMode4     = 4,
+    AddrMode5     = 5,
+    AddrModeT1    = 6,
+    AddrModeT2    = 7,
+    AddrModeT4    = 8,
+    AddrModeTs    = 9,   // i8 * 4 for pc and sp relative data
+
+    // Size* - Flags to keep track of the size of an instruction.
+    SizeShift     = 4,
+    SizeMask      = 7 << SizeShift,
+    SizeSpecial   = 1,   // 0 byte pseudo or special case.
+    Size8Bytes    = 2,
+    Size4Bytes    = 3,
+    Size2Bytes    = 4,
+    
+    // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+    // and store ops 
+    IndexModeShift = 7,
+    IndexModeMask  = 3 << IndexModeShift,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+    
+    // Opcode
+    OpcodeShift   = 9,
+    OpcodeMask    = 0xf << OpcodeShift
+  };
+}
 
 class ARMInstrInfo : public TargetInstrInfo {
   const ARMRegisterInfo RI;
 public:
-  ARMInstrInfo();
+  ARMInstrInfo(const ARMSubtarget &STI);
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
@@ -35,15 +80,33 @@
   /// This is used for addressing modes.
   virtual const TargetRegisterClass *getPointerRegClass() const;
 
+  /// getDWARF_LABELOpcode - Return the opcode of the target's DWARF_LABEL
+  /// instruction if it has one.  This is used by codegen passes that update
+  /// DWARF line number info as they modify the code.
+  virtual unsigned getDWARF_LABELOpcode() const;
+  
   /// Return true if the instruction is a register to register move and
   /// leave the source and dest operands in the passed parameters.
   ///
   virtual bool isMoveInstr(const MachineInstr &MI,
                            unsigned &SrcReg, unsigned &DstReg) const;
-
+  virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+  
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables &LV) const;
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             std::vector<MachineOperand> &Cond) const;
+  virtual void RemoveBranch(MachineBasicBlock &MBB) const;
   virtual void InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB,
                             const std::vector<MachineOperand> &Cond) const;
+  virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+  virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
 };
 
 }


Index: llvm/lib/Target/ARM/ARMInstrInfo.td
diff -u llvm/lib/Target/ARM/ARMInstrInfo.td:1.83 llvm/lib/Target/ARM/ARMInstrInfo.td:1.84
--- llvm/lib/Target/ARM/ARMInstrInfo.td:1.83	Sun Dec 31 12:52:39 2006
+++ llvm/lib/Target/ARM/ARMInstrInfo.td	Fri Jan 19 01:51:42 2007
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.td - Target Description for ARM Target ----------------===//
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,351 +12,1184 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Address operands
-def op_addr_mode1 : Operand<iPTR> {
-  let PrintMethod = "printAddrMode1";
-  let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm);
-}
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
 
-def op_addr_mode2 : Operand<iPTR> {
-  let PrintMethod = "printAddrMode2";
-  let MIOperandInfo = (ops ptr_rc, i32imm);
-}
+// Type profiles.
+def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
 
-def op_addr_mode5 : Operand<iPTR> {
-  let PrintMethod = "printAddrMode5";
-  let MIOperandInfo = (ops ptr_rc, i32imm);
-}
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov    : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                    SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond  : SDTypeProfile<0, 2,
+                                   [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT    : SDTypeProfile<0, 3,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+// Node definitions.
+def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
+def ARMWrapperCall   : SDNode<"ARMISD::WrapperCall", SDTIntUnaryOp>;
+def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq,
+    		              [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeq,
+    		              [SDNPHasChain, SDNPOutFlag]>;
 
-// Define ARM specific addressing mode.
-//Addressing Mode 1: data processing operands
-def addr_mode1 : ComplexPattern<iPTR, 3, "SelectAddrMode1", [imm, sra, shl, srl],
-                                []>;
+def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
-//Addressing Mode 2: Load and Store Word or Unsigned Byte
-def addr_mode2 : ComplexPattern<iPTR, 2, "SelectAddrMode2", [], []>;
+def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTRet,
+                              [SDNPHasChain, SDNPOptInFlag]>;
 
-//Addressing Mode 5: VFP load/store
-def addr_mode5 : ComplexPattern<iPTR, 2, "SelectAddrMode5", [], []>;
+def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+                              [SDNPInFlag]>;
+def ARMcneg          : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+                              [SDNPInFlag]>;
+
+def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+                              [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+                              [SDNPHasChain]>;
+
+def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+                              [SDNPOutFlag]>;
+
+def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInFlag ]>;
 
 //===----------------------------------------------------------------------===//
-// Instruction Class Templates
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T  : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6   : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsARM   : Predicate<"!Subtarget->isThumb()">;
+
 //===----------------------------------------------------------------------===//
-class InstARM<dag ops, string asmstr, list<dag> pattern> : Instruction {
-  let Namespace = "ARM";
+// ARM Flag Definitions.
 
-  dag OperandList = ops;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
+class RegConstraint<string C> {
+  string Constraints = C;
 }
 
-class IntBinOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops IntRegs:$dst, IntRegs:$a, IntRegs:$b),
-                 !strconcat(OpcStr, " $dst, $a, $b"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$a, IntRegs:$b))]>;
-
-class FPBinOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops FPRegs:$dst, FPRegs:$a, FPRegs:$b),
-                 !strconcat(OpcStr, " $dst, $a, $b"),
-                 [(set FPRegs:$dst, (OpNode FPRegs:$a, FPRegs:$b))]>;
-
-class DFPBinOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops DFPRegs:$dst, DFPRegs:$a, DFPRegs:$b),
-                 !strconcat(OpcStr, " $dst, $a, $b"),
-                 [(set DFPRegs:$dst, (OpNode DFPRegs:$a, DFPRegs:$b))]>;
-
-class FPUnaryOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops FPRegs:$dst, FPRegs:$src),
-                 !strconcat(OpcStr, " $dst, $src"),
-                 [(set FPRegs:$dst, (OpNode FPRegs:$src))]>;
-
-class DFPUnaryOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops DFPRegs:$dst, DFPRegs:$src),
-                 !strconcat(OpcStr, " $dst, $src"),
-                 [(set DFPRegs:$dst, (OpNode DFPRegs:$src))]>;
-
-class Addr1BinOp<string OpcStr, SDNode OpNode> :
-        InstARM<(ops IntRegs:$dst, IntRegs:$a, op_addr_mode1:$b),
-                 !strconcat(OpcStr, " $dst, $a, $b"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$a, addr_mode1:$b))]>;
-
 //===----------------------------------------------------------------------===//
-// Instructions
+//  ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getValue()),
+                                   MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getValue()),
+                                   MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getValue()),
+                                   MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+  int32_t v = (int32_t)N->getValue();
+  return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getValue() >= 1 && (int32_t)N->getValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getValue() >= 16 && (int32_t)N->getValue() < 32;
+}]>;
+
+def so_imm_neg : 
+  PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getValue()) != -1; }],
+          so_imm_neg_XFORM>;
+
+def so_imm_not : 
+  PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(int)N->getValue()) != -1; }],
+          so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+  return TLI.ComputeNumSignBits(SDOperand(N,0)) >= 17;
+}]>;
+
+
+// Break so_imm's up into two pieces.  This handles immediates with up to 16
+// bits set in them.  This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+  return ARM_AM::isSOImmTwoPartVal((unsigned)N->getValue());
+}]>;
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getValue());
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getValue());
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+
 //===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
 
+// Branch target.
 def brtarget : Operand<OtherVT>;
 
 // Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
-  def CCOp : Operand<i32>;
+def CCOp : Operand<i32> {
+  let PrintMethod = "printCCOperand";
+}
 
-def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
-def callseq_start  : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq,
-    		             [SDNPHasChain, SDNPOutFlag]>;
-def callseq_end    : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeq,
-    		             [SDNPHasChain, SDNPOutFlag]>;
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+  let PrintMethod = "printRegisterList";
+}
 
-def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def ARMcall        : SDNode<"ARMISD::CALL", SDT_ARMcall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def retflag        : SDNode<"ARMISD::RET_FLAG", SDTRet,
-	                   [SDNPHasChain, SDNPOptInFlag]>;
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+  let PrintMethod = "printCPInstOperand";
+}
 
-def SDTarmselect   : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
-def armselect      : SDNode<"ARMISD::SELECT", SDTarmselect, [SDNPInFlag, SDNPOutFlag]>;
+def jtblock_operand : Operand<i32> {
+  let PrintMethod = "printJTBlockOperand";
+}
 
-def SDTarmfmstat   : SDTypeProfile<0, 0, []>;
-def armfmstat      : SDNode<"ARMISD::FMSTAT", SDTarmfmstat, [SDNPInFlag, SDNPOutFlag]>;
+// Local PC labels.
+def pclabel : Operand<i32> {
+  let PrintMethod = "printPCLabel";
+}
 
-def SDTarmbr       : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
-def armbr          : SDNode<"ARMISD::BR", SDTarmbr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>,    // reg reg imm
+            ComplexPattern<i32, 3, "SelectShifterOperandReg",
+                            [shl,srl,sra,rotr]> {
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
 
-def SDTVoidBinOp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def armcmp       : SDNode<"ARMISD::CMP",  SDTVoidBinOp, [SDNPOutFlag]>;
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.  so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+             PatLeaf<(imm),
+                     [{ return ARM_AM::getSOImmVal(N->getValue()) != -1; }],
+                     so_imm_XFORM> {
+  let PrintMethod = "printSOImmOperand";
+}
 
-def armfsitos      : SDNode<"ARMISD::FSITOS", SDTUnaryOp>;
-def armftosis      : SDNode<"ARMISD::FTOSIS", SDTUnaryOp>;
-def armfsitod      : SDNode<"ARMISD::FSITOD", SDTUnaryOp>;
-def armftosid      : SDNode<"ARMISD::FTOSID", SDTUnaryOp>;
-def armfuitos      : SDNode<"ARMISD::FUITOS", SDTUnaryOp>;
-def armftouis      : SDNode<"ARMISD::FTOUIS", SDTUnaryOp>;
-def armfuitod      : SDNode<"ARMISD::FUITOD", SDTUnaryOp>;
-def armftouid      : SDNode<"ARMISD::FTOUID", SDTUnaryOp>;
 
-def SDTarmfmrrd    : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisFP<2>]>;
-def armfmrrd       : SDNode<"ARMISD::FMRRD", SDTarmfmrrd,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+// Define ARM specific addressing modes.
 
-def SDTarmfmdrr    : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>;
-def armfmdrr       : SDNode<"ARMISD::FMDRR", SDTarmfmdrr, []>;
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
 
-def ADJCALLSTACKUP : InstARM<(ops i32imm:$amt),
-                            "!ADJCALLSTACKUP $amt",
-                            [(callseq_end imm:$amt)]>, Imp<[R13],[R13]>;
+def am2offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+  let PrintMethod = "printAddrMode2OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
 
-def ADJCALLSTACKDOWN : InstARM<(ops i32imm:$amt),
-                               "!ADJCALLSTACKDOWN $amt",
-                               [(callseq_start imm:$amt)]>, Imp<[R13],[R13]>;
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+  let PrintMethod = "printAddrMode3Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
 
-def IMPLICIT_DEF_Int : InstARM<(ops IntRegs:$dst),
-                               "@IMPLICIT_DEF $dst",
-                               [(set IntRegs:$dst, (undef))]>;
-def IMPLICIT_DEF_FP  : InstARM<(ops FPRegs:$dst), "@IMPLICIT_DEF $dst",
-                               [(set FPRegs:$dst, (undef))]>;
-def IMPLICIT_DEF_DFP : InstARM<(ops DFPRegs:$dst), "@IMPLICIT_DEF $dst",
-                               [(set DFPRegs:$dst, (undef))]>;
+def am3offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+  let PrintMethod = "printAddrMode3OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
 
-let isReturn = 1 in {
-  def bx: InstARM<(ops), "bx r14", [(retflag)]>;
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+                ComplexPattern<i32, 2, "", []> {
+  let PrintMethod = "printAddrMode4Operand";
+  let MIOperandInfo = (ops GPR, i32imm);
 }
 
-let noResults = 1, Defs = [R0, R1, R2, R3, R14] in {
-  def bl: InstARM<(ops i32imm:$func, variable_ops), "bl $func", []>;
-  def blx     : InstARM<(ops IntRegs:$func, variable_ops), "blx $func", [(ARMcall IntRegs:$func)]>;
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+  let PrintMethod = "printAddrMode5Operand";
+  let MIOperandInfo = (ops GPR, i32imm);
 }
 
-def LDR     : InstARM<(ops IntRegs:$dst, op_addr_mode2:$addr),
-                     "ldr $dst, $addr",
-                     [(set IntRegs:$dst, (load addr_mode2:$addr))]>;
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+                 ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+  let PrintMethod = "printAddrModePCOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
 
-def LDRB    : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
-                       "ldrb $dst, [$addr]",
-                       [(set IntRegs:$dst, (zextloadi8 IntRegs:$addr))]>;
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+//
 
-def LDRSB   : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
-                       "ldrsb $dst, [$addr]",
-                       [(set IntRegs:$dst, (sextloadi8 IntRegs:$addr))]>;
+// Addressing mode.
+class AddrMode<bits<4> val> {
+  bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1    : AddrMode<1>;
+def AddrMode2    : AddrMode<2>;
+def AddrMode3    : AddrMode<3>;
+def AddrMode4    : AddrMode<4>;
+def AddrMode5    : AddrMode<5>;
+def AddrModeT1   : AddrMode<6>;
+def AddrModeT2   : AddrMode<7>;
+def AddrModeT4   : AddrMode<8>;
+def AddrModeTs   : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+  bits<3> Value = val;
+}
+def SizeInvalid  : SizeFlagVal<0>;  // Unset.
+def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
+def Size8Bytes   : SizeFlagVal<2>;
+def Size4Bytes   : SizeFlagVal<3>;
+def Size2Bytes   : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
 
-def LDRH    : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
-                       "ldrh $dst, [$addr]",
-                       [(set IntRegs:$dst, (zextloadi16 IntRegs:$addr))]>;
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
 
-def LDRSH   : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
-                       "ldrsh $dst, [$addr]",
-                       [(set IntRegs:$dst, (sextloadi16 IntRegs:$addr))]>;
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV6];
+}
 
-def STR     : InstARM<(ops IntRegs:$src, op_addr_mode2:$addr),
-                     "str $src, $addr",
-                     [(store IntRegs:$src, addr_mode2:$addr)]>;
 
-def STRB    : InstARM<(ops IntRegs:$src, IntRegs:$addr),
-                       "strb $src, [$addr]",
-                       [(truncstorei8 IntRegs:$src, IntRegs:$addr)]>;
+class InstARM<bits<4> opcod, AddrMode am, SizeFlagVal sz, IndexMode im,
+              dag ops, string asmstr, string cstr>
+  : Instruction {
+  let Namespace = "ARM";
 
-def STRH    : InstARM<(ops IntRegs:$src, IntRegs:$addr),
-                       "strh $src, [$addr]",
-                       [(truncstorei16 IntRegs:$src, IntRegs:$addr)]>;
+  bits<4> Opcode = opcod;
+  AddrMode AM = am;
+  bits<4> AddrModeBits = AM.Value;
+  
+  SizeFlagVal SZ = sz;
+  bits<3> SizeFlag = SZ.Value;
+
+  IndexMode IM = im;
+  bits<2> IndexModeBits = IM.Value;
+  
+  dag OperandList = ops;
+  let AsmString   = asmstr;
+  let Constraints = cstr;
+}
 
-def MOV   : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src),
-                    "mov $dst, $src", [(set IntRegs:$dst, addr_mode1:$src)]>;
+class PseudoInst<dag ops, string asm, list<dag> pattern>
+  : InstARM<0, AddrModeNone, SizeSpecial, IndexModeNone, ops, asm, ""> {
+  let Pattern = pattern;
+}
 
-def MVN     : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src),
-                       "mvn $dst, $src", [(set IntRegs:$dst, (not addr_mode1:$src))]>;
+class I<dag ops, AddrMode am, SizeFlagVal sz, IndexMode im,
+        string asm, string cstr, list<dag> pattern>
+  // FIXME: Set all opcodes to 0 for now.
+  : InstARM<0, am, sz, im, ops, asm, cstr> {
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
 
-def ADD     : Addr1BinOp<"add",  add>;
-def ADCS    : Addr1BinOp<"adcs", adde>;
-def ADDS    : Addr1BinOp<"adds", addc>;
-def SUB     : Addr1BinOp<"sub",  sub>;
-def SBCS    : Addr1BinOp<"sbcs", sube>;
-def SUBS    : Addr1BinOp<"subs", subc>;
-def AND     : Addr1BinOp<"and",  and>;
-def EOR     : Addr1BinOp<"eor",  xor>;
-def ORR     : Addr1BinOp<"orr",  or>;
+class AI<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI1<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode1, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI2<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode2, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI3<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode3, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI4<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode4, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AIx2<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrModeNone, Size8Bytes, IndexModeNone, asm, "", pattern>;
+
+// Pre-indexed ops
+class AI2pr<dag ops, string asm, string cstr, list<dag> pattern>
+  : I<ops, AddrMode2, Size4Bytes, IndexModePre, asm, cstr, pattern>;
+class AI3pr<dag ops, string asm, string cstr, list<dag> pattern>
+  : I<ops, AddrMode3, Size4Bytes, IndexModePre, asm, cstr, pattern>;
+
+// Post-indexed ops
+class AI2po<dag ops, string asm, string cstr, list<dag> pattern>
+  : I<ops, AddrMode2, Size4Bytes, IndexModePost, asm, cstr, pattern>;
+class AI3po<dag ops, string asm, string cstr, list<dag> pattern>
+  : I<ops, AddrMode3, Size4Bytes, IndexModePost, asm, cstr, pattern>;
+
+// BR_JT instructions
+class JTI<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrModeNone, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI1<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode1, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI2<dag ops, string asm, list<dag> pattern>
+  : I<ops, AddrMode2, SizeSpecial, IndexModeNone, asm, "", pattern>;
+
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+
+/// AI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AI1_bin_irs<string opc, PatFrag opnode> {
+  def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+               !strconcat(opc, " $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+  def rr : AI1<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, " $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+  def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+               !strconcat(opc, " $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
 
-let isTwoAddress = 1 in {
-  def movcond : InstARM<(ops IntRegs:$dst, IntRegs:$false,
-			 op_addr_mode1:$true, CCOp:$cc),
-	                 "mov$cc $dst, $true",
-		         [(set IntRegs:$dst, (armselect addr_mode1:$true,
-			   IntRegs:$false, imm:$cc))]>;
+/// AI1_bin0_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns.
+/// Similar to AI1_bin_irs except the instruction does not produce a result.
+multiclass AI1_bin0_irs<string opc, PatFrag opnode> {
+  def ri : AI1<(ops GPR:$a, so_imm:$b),
+               !strconcat(opc, " $a, $b"),
+               [(opnode GPR:$a, so_imm:$b)]>;
+  def rr : AI1<(ops GPR:$a, GPR:$b),
+               !strconcat(opc, " $a, $b"),
+               [(opnode GPR:$a, GPR:$b)]>;
+  def rs : AI1<(ops GPR:$a, so_reg:$b),
+               !strconcat(opc, " $a, $b"),
+               [(opnode GPR:$a, so_reg:$b)]>;
+}
 
-  def fcpyscond : InstARM<(ops FPRegs:$dst, FPRegs:$false,
-			 FPRegs:$true, CCOp:$cc),
-	                 "fcpys$cc $dst, $true",
-		         [(set FPRegs:$dst, (armselect FPRegs:$true,
-			   FPRegs:$false, imm:$cc))]>;
+/// AI1_bin_is - Defines a set of (op r, {so_imm|so_reg}) patterns for a binop.
+multiclass AI1_bin_is<string opc, PatFrag opnode> {
+  def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+               !strconcat(opc, " $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+  def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+               !strconcat(opc, " $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
 
-  def fcpydcond : InstARM<(ops DFPRegs:$dst, DFPRegs:$false,
-			 DFPRegs:$true, CCOp:$cc),
-	                 "fcpyd$cc $dst, $true",
-		         [(set DFPRegs:$dst, (armselect DFPRegs:$true,
-			   DFPRegs:$false, imm:$cc))]>;
+/// AI1_unary_irs - Defines a set of (op {so_imm|r|so_reg}) patterns for unary
+/// ops.
+multiclass AI1_unary_irs<string opc, PatFrag opnode> {
+  def i : AI1<(ops GPR:$dst, so_imm:$a),
+              !strconcat(opc, " $dst, $a"),
+              [(set GPR:$dst, (opnode so_imm:$a))]>;
+  def r : AI1<(ops GPR:$dst, GPR:$a),
+              !strconcat(opc, " $dst, $a"),
+              [(set GPR:$dst, (opnode GPR:$a))]>;
+  def s : AI1<(ops GPR:$dst, so_reg:$a),
+              !strconcat(opc, " $dst, $a"),
+              [(set GPR:$dst, (opnode so_reg:$a))]>;
 }
 
-def MUL     : IntBinOp<"mul", mul>;
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_unary_rrot<string opc, PatFrag opnode> {
+  def r     : AI<(ops GPR:$dst, GPR:$Src),
+                 !strconcat(opc, " $dst, $Src"),
+                 [(set GPR:$dst, (opnode GPR:$Src))]>, Requires<[IsARM, HasV6]>;
+  def r_rot : AI<(ops GPR:$dst, GPR:$Src, i32imm:$rot),
+                 !strconcat(opc, " $dst, $Src, ror $rot"),
+                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+              Requires<[IsARM, HasV6]>;
+}
 
-let Defs = [R0] in {
-  def SMULL   : IntBinOp<"smull r12,", mulhs>;
-  def UMULL   : IntBinOp<"umull r12,", mulhu>;
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<string opc, PatFrag opnode> {
+  def rr     : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS),
+                  !strconcat(opc, " $dst, $LHS, $RHS"),
+                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+                  Requires<[IsARM, HasV6]>;
+  def rr_rot : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS, i32imm:$rot),
+                  !strconcat(opc, " $dst, $LHS, $RHS, ror $rot"),
+                  [(set GPR:$dst, (opnode GPR:$LHS,
+                                          (rotr GPR:$RHS, rot_imm:$rot)))]>,
+                  Requires<[IsARM, HasV6]>;
 }
 
-let isTerminator = 1, isBranch = 1 in {
-  def bcond   : InstARM<(ops brtarget:$dst, CCOp:$cc),
-	                    "b$cc $dst",
-		            [(armbr bb:$dst, imm:$cc)]>;
 
-  def b       : InstARM<(ops brtarget:$dst),
-                        "b $dst",
-                        [(br bb:$dst)]>;
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+def IMPLICIT_DEF_GPR : 
+PseudoInst<(ops GPR:$rD),
+           "@ IMPLICIT_DEF_GPR $rD",
+           [(set GPR:$rD, (undef))]>;
+
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function.  The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+def CONSTPOOL_ENTRY :
+PseudoInst<(ops cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size),
+           "${instid:label} ${cpidx:cpentry}", []>;
+
+def ADJCALLSTACKUP :
+PseudoInst<(ops i32imm:$amt),
+           "@ ADJCALLSTACKUP $amt",
+           [(ARMcallseq_end imm:$amt)]>, Imp<[SP],[SP]>;
+
+def ADJCALLSTACKDOWN : 
+PseudoInst<(ops i32imm:$amt),
+           "@ ADJCALLSTACKDOWN $amt",
+           [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>;
+
+def DWARF_LOC :
+PseudoInst<(ops i32imm:$line, i32imm:$col, i32imm:$file),
+           ".loc $file, $line, $col",
+           [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+def DWARF_LABEL :
+PseudoInst<(ops i32imm:$id),
+           "\nLdebug_loc${id:no_hash}:",
+           [(dwarf_label (i32 imm:$id))]>;
+
+def PICADD : AI1<(ops GPR:$dst, GPR:$a, pclabel:$cp),
+                  "\n$cp:\n\tadd $dst, pc, $a",
+                  [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+let AddedComplexity = 10 in
+def PICLD : AI2<(ops GPR:$dst, addrmodepc:$addr),
+                  "\n${addr:label}:\n\tldr $dst, $addr",
+                  [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+  def BX_RET : AI<(ops), "bx lr", [(ARMretflag)]>;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isLoad = 1, isReturn = 1, isTerminator = 1 in
+  def LDM_RET : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops),
+                    "ldm${addr:submode} $addr, $dst1",
+                    []>;
+
+let isCall = 1, noResults = 1, 
+  Defs = [R0, R1, R2, R3, R12, LR,
+          D0, D1, D2, D3, D4, D5, D6, D7] in {
+  def BL  : AI<(ops i32imm:$func, variable_ops),
+               "bl ${func:call}",
+               [(ARMcall tglobaladdr:$func)]>;
+  // ARMv5T and above
+  def BLX : AI<(ops GPR:$dst, variable_ops),
+               "blx $dst",
+               [(ARMcall GPR:$dst)]>, Requires<[IsARM, HasV5T]>;
+  // ARMv4T
+  def BX : AIx2<(ops GPR:$dst, variable_ops),
+                "mov lr, pc\n\tbx $dst",
+                [(ARMcall_nolink GPR:$dst)]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  def B : AI<(ops brtarget:$dst), "b $dst",
+             [(br bb:$dst)]>;
+
+  def BR_JTr : JTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id),
+                    "mov pc, $dst \n$jt",
+                    [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>;
+  def BR_JTm : JTI2<(ops addrmode2:$dst, jtblock_operand:$jt, i32imm:$id),
+                     "ldr pc, $dst \n$jt",
+                     [(ARMbrjt (i32 (load addrmode2:$dst)), tjumptable:$jt,
+                       imm:$id)]>;
+  def BR_JTadd : JTI1<(ops GPR:$dst, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+                       "add pc, $dst, $idx \n$jt",
+                       [(ARMbrjt (add GPR:$dst, GPR:$idx), tjumptable:$jt,
+                         imm:$id)]>;
 }
 
-def cmp      : InstARM<(ops IntRegs:$a, op_addr_mode1:$b),
-	               "cmp $a, $b",
-		       [(armcmp IntRegs:$a, addr_mode1:$b)]>;
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
+  def Bcc : AI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
+                [(ARMbrcond bb:$dst, imm:$cc)]>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+let isLoad = 1 in {
+def LDR  : AI2<(ops GPR:$dst, addrmode2:$addr),
+               "ldr $dst, $addr",
+               [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Loads with zero extension
+def LDRH  : AI3<(ops GPR:$dst, addrmode3:$addr),
+                 "ldrh $dst, $addr",
+                [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB  : AI2<(ops GPR:$dst, addrmode2:$addr),
+                 "ldrb $dst, $addr",
+                [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3<(ops GPR:$dst, addrmode3:$addr),
+                 "ldrsh $dst, $addr",
+                [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3<(ops GPR:$dst, addrmode3:$addr),
+                 "ldrsb $dst, $addr",
+                [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+// Load doubleword
+def LDRD  : AI3<(ops GPR:$dst, addrmode3:$addr),
+                 "ldrd $dst, $addr",
+                []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE  : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+                    "ldr $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base, am2offset:$offset),
+                    "ldr $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE  : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+                     "ldrh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+                     "ldrh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE  : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+                     "ldrb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am2offset:$offset),
+                     "ldrb $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+                      "ldrsh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+                      "ldrsh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+                      "ldrsb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+                      "ldrsb $dst, [$base], $offset", "$base = $base_wb", []>;
+} // isLoad
+
+// Store
+let isStore = 1 in {
+def STR  : AI2<(ops GPR:$src, addrmode2:$addr),
+               "str $src, $addr",
+               [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3<(ops GPR:$src, addrmode3:$addr),
+               "strh $src, $addr",
+               [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2<(ops GPR:$src, addrmode2:$addr),
+               "strb $src, $addr",
+               [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+def STRD : AI3<(ops GPR:$src, addrmode3:$addr),
+               "strd $src, $addr",
+               []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE  : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base, am2offset:$offset),
+                    "str $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2po<(ops  GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+                    "str $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3pr<(ops  GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+                     "strh $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3po<(ops  GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+                     "strh $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+                                         GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2pr<(ops  GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+                     "strb $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+                                         GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2po<(ops  GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+                     "strb $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+                                         GPR:$base, am2offset:$offset))]>;
+} // isStore
 
-// Floating Point Compare
-def fcmps   : InstARM<(ops FPRegs:$a, FPRegs:$b),
-	               "fcmps $a, $b",
-		       [(armcmp FPRegs:$a, FPRegs:$b)]>;
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
 
-def fcmpd   : InstARM<(ops DFPRegs:$a, DFPRegs:$b),
-	               "fcmpd $a, $b",
-		       [(armcmp DFPRegs:$a, DFPRegs:$b)]>;
+let isLoad = 1 in
+def LDM : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops),
+              "ldm${addr:submode} $addr, $dst1",
+              []>;
+
+let isStore = 1 in
+def STM : AI4<(ops addrmode4:$addr, reglist:$src1, variable_ops),
+              "stm${addr:submode} $addr, $src1",
+              []>;
 
-// Floating Point Copy
-def FCPYS   : InstARM<(ops FPRegs:$dst, FPRegs:$src), "fcpys $dst, $src", []>;
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
 
-def FCPYD   : InstARM<(ops DFPRegs:$dst, DFPRegs:$src), "fcpyd $dst, $src", []>;
+def MOVrr : AI1<(ops GPR:$dst, GPR:$src),
+                "mov $dst, $src", []>;
+def MOVrs : AI1<(ops GPR:$dst, so_reg:$src),
+                "mov $dst, $src", [(set GPR:$dst, so_reg:$src)]>;
+def MOVri : AI1<(ops GPR:$dst, so_imm:$src),
+                "mov $dst, $src", [(set GPR:$dst, so_imm:$src)]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+def MOVsrl_flag : AI1<(ops GPR:$dst, GPR:$src),
+                      "movs $dst, $src, lsr #1",
+                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>;
+def MOVsra_flag : AI1<(ops GPR:$dst, GPR:$src),
+                      "movs $dst, $src, asr #1",
+                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>;
+def MOVrrx      : AI1<(ops GPR:$dst, GPR:$src),
+                      "mov $dst, $src, rrx",
+                      [(set GPR:$dst, (ARMrrx GPR:$src))]>;
 
-// Floating Point Conversion
-// We use bitconvert for moving the data between the register classes.
-// The format conversion is done with ARM specific nodes
 
-def FMSR    : InstARM<(ops FPRegs:$dst, IntRegs:$src),
-                       "fmsr $dst, $src", [(set FPRegs:$dst, (bitconvert IntRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
 
-def FMRS    : InstARM<(ops IntRegs:$dst, FPRegs:$src),
-                       "fmrs $dst, $src", [(set IntRegs:$dst, (bitconvert FPRegs:$src))]>;
+// Sign extenders
 
-def FMRRD   : InstARM<(ops IntRegs:$i0, IntRegs:$i1, DFPRegs:$src),
-                       "fmrrd $i0, $i1, $src", [(armfmrrd IntRegs:$i0, IntRegs:$i1, DFPRegs:$src)]>;
+defm SXTB  : AI_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH  : AI_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
 
-def FMDRR   : InstARM<(ops DFPRegs:$dst, IntRegs:$i0, IntRegs:$i1),
-                       "fmdrr $dst, $i0, $i1", [(set DFPRegs:$dst, (armfmdrr IntRegs:$i0, IntRegs:$i1))]>;
+defm SXTAB : AI_bin_rrot<"sxtab",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<"sxtah",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB   : AI_unary_rrot<"uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH   : AI_unary_rrot<"uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<"uxtab",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<"uxtah",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
 
-def FSITOS  : InstARM<(ops FPRegs:$dst, FPRegs:$src),
-                       "fsitos $dst, $src", [(set FPRegs:$dst, (armfsitos FPRegs:$src))]>;
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
 
-def FTOSIS  : InstARM<(ops FPRegs:$dst, FPRegs:$src),
-                       "ftosis $dst, $src", [(set FPRegs:$dst, (armftosis FPRegs:$src))]>;
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm ADD  : AI1_bin_irs<"add" , BinOpFrag<(add  node:$LHS, node:$RHS)>>;
+defm ADDS : AI1_bin_irs<"adds", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm ADC  : AI1_bin_irs<"adc" , BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SUB  : AI1_bin_irs<"sub" , BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm SUBS : AI1_bin_irs<"subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm SBC  : AI1_bin_irs<"sbc" , BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// These don't define reg/reg forms, because they are handled above.
+defm RSB  : AI1_bin_is <"rsb" , BinOpFrag<(sub  node:$RHS, node:$LHS)>>;
+defm RSBS : AI1_bin_is <"rsbs", BinOpFrag<(subc node:$RHS, node:$LHS)>>;
+defm RSC  : AI1_bin_is <"rsc" , BinOpFrag<(sube node:$RHS, node:$LHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
+             (SUBri  GPR:$src, so_imm_neg:$imm)>;
+
+//def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+//             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde   GPR:$src, so_imm_neg:$imm),
+//             (SBCri  GPR:$src, so_imm_neg:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
 
-def FSITOD  : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
-                       "fsitod $dst, $src", [(set DFPRegs:$dst, (armfsitod FPRegs:$src))]>;
+defm AND   : AI1_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR   : AI1_bin_irs<"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>>;
+defm EOR   : AI1_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC   : AI1_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def FTOSID  : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
-                       "ftosid $dst, $src", [(set FPRegs:$dst, (armftosid DFPRegs:$src))]>;
+defm MVN   : AI1_unary_irs<"mvn", not>;
 
-def FUITOS  : InstARM<(ops FPRegs:$dst, FPRegs:$src),
-                       "fuitos $dst, $src", [(set FPRegs:$dst, (armfuitos FPRegs:$src))]>;
+def : ARMPat<(i32  so_imm_not:$imm),
+             (MVNi so_imm_not:$imm)>;
 
-def FTOUIS  : InstARM<(ops FPRegs:$dst, FPRegs:$src),
-                       "ftouis $dst, $src", [(set FPRegs:$dst, (armftouis FPRegs:$src))]>;
+def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
+             (BICri GPR:$src, so_imm_not:$imm)>;
 
-def FUITOD  : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
-                       "fuitod $dst, $src", [(set DFPRegs:$dst, (armfuitod FPRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
 
-def FTOUID  : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
-                       "ftouid $dst, $src", [(set FPRegs:$dst, (armftouid DFPRegs:$src))]>;
+// AI_orr - Defines a (op r, r) pattern.
+class AI_orr<string opc, SDNode opnode>
+  : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+       !strconcat(opc, " $dst, $a, $b"),
+       [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+
+// AI_oorr - Defines a (op (op r, r), r) pattern.
+class AI_oorr<string opc, SDNode opnode1, SDNode opnode2>
+  : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+       !strconcat(opc, " $dst, $a, $b, $c"),
+       [(set GPR:$dst, (opnode1 (opnode2 GPR:$a, GPR:$b), GPR:$c))]>;
+
+def MUL  : AI_orr<"mul", mul>;
+def MLA  : AI_oorr<"mla", add, mul>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+               "smull $ldst, $hdst, $a, $b",
+               []>;
+
+def UMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+               "umull $ldst, $hdst, $a, $b",
+               []>;
+
+// Multiply + accumulate
+def SMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+               "smlal $ldst, $hdst, $a, $b",
+               []>;
+
+def UMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+               "umlal $ldst, $hdst, $a, $b",
+               []>;
+
+def UMAAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+               "umaal $ldst, $hdst, $a, $b",
+               []>, Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AI_orr<"smmul", mulhs>, Requires<[IsARM, HasV6]>;
+def SMMLA : AI_oorr<"smmla", add, mulhs>, Requires<[IsARM, HasV6]>;
+
+
+def SMMLS : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+               "smmls $dst, $a, $b, $c",
+               [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+               Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+  def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                       (sext_inreg GPR:$b, i16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16),
+                                       (sra (shl GPR:$b, 16), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode sext_16_node:$a, sext_16_node:$b))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bt $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                       (sra GPR:$b, 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bt $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16),
+                                       (sra GPR:$b, 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "bt $dst, $a, $b"),
+               [(set GPR:$dst, (opnode sext_16_node:$a, (sra GPR:$b, 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "tb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+                                       (sext_inreg GPR:$b, i16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "tb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+                                       (sra (shl GPR:$b, 16), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "tb $dst, $a, $b"),
+               [(set GPR:$dst, (opnode (sra GPR:$a, 16), sext_16_node:$b))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+              !strconcat(opc, "tt $dst, $a, $b"),
+              [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+                                      (sra GPR:$b, 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "wb $dst, $a, $b"),
+               [(set GPR:$dst, (sra (opnode GPR:$a,
+                                     (sext_inreg GPR:$b, i16)), 16))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "wb $dst, $a, $b"),
+               [(set GPR:$dst, (sra (opnode GPR:$a,
+                                     (sra (shl GPR:$b, 16), 16)), 16))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+               !strconcat(opc, "wb $dst, $a, $b"),
+               [(set GPR:$dst, (sra (opnode GPR:$a, sext_16_node:$b), 16))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+              !strconcat(opc, "wt $dst, $a, $b"),
+              [(set GPR:$dst, (sra (opnode GPR:$a,
+                                    (sra GPR:$b, 16)), 16))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
 
-def FCVTDS  : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
-                       "fcvtds $dst, $src", [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+multiclass AI_smla<string opc, PatFrag opnode> {
+  def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc,
+                                (opnode (sext_inreg GPR:$a, i16),
+                                        (sext_inreg GPR:$b, i16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc,
+                                (opnode (sra (shl GPR:$a, 16), 16),
+                                        (sra (shl GPR:$b, 16), 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc,
+                                (opnode sext_16_node:$a, sext_16_node:$b)))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bt $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+                                                      (sra GPR:$b, 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bt $dst, $a, $b, $acc"),
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sra (shl GPR:$a, 16), 16),
+                                                     (sra GPR:$b, 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "bt $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (opnode sext_16_node:$a,
+                                                      (sra GPR:$b, 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "tb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                  (sext_inreg GPR:$b, i16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "tb $dst, $a, $b, $acc"),
+             [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                (sra (shl GPR:$b, 16), 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "tb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                      sext_16_node:$b)))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "tt $dst, $a, $b, $acc"),
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                     (sra GPR:$b, 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "wb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                             (sext_inreg GPR:$b, i16)), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "wb $dst, $a, $b, $acc"),
+             [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                           (sra (shl GPR:$b, 16), 16)), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+  def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+               !strconcat(opc, "wb $dst, $a, $b, $acc"),
+               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                              sext_16_node:$b), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "wt $dst, $a, $b, $acc"),
+              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                                   (sra GPR:$b, 16)), 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
 
-def FCVTSD  : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
-                       "fcvtsd $dst, $src", [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
-def FMSTAT  : InstARM<(ops ), "fmstat", [(armfmstat)]>;
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 
-// Floating Point Arithmetic
-def FADDS   : FPBinOp<"fadds",  fadd>;
-def FADDD   : DFPBinOp<"faddd", fadd>;
-def FSUBS   : FPBinOp<"fsubs",  fsub>;
-def FSUBD   : DFPBinOp<"fsubd", fsub>;
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
 
-def FNEGS   : FPUnaryOp<"fnegs",  fneg>;
-def FNEGD   : DFPUnaryOp<"fnegd", fneg>;
-def FABSS   : FPUnaryOp<"fabss",  fabs>;
-def FABSD   : DFPUnaryOp<"fabsd", fabs>;
+def CLZ  : AI<(ops GPR:$dst, GPR:$src),
+              "clz $dst, $src",
+              [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]>;
+
+def REV  : AI<(ops GPR:$dst, GPR:$src),
+              "rev $dst, $src",
+              [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AI<(ops GPR:$dst, GPR:$src),
+               "rev16 $dst, $src",
+               [(set GPR:$dst,
+                   (or (and (srl GPR:$src, 8), 0xFF),
+                       (or (and (shl GPR:$src, 8), 0xFF00),
+                           (or (and (srl GPR:$src, 8), 0xFF0000),
+                               (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+               Requires<[IsARM, HasV6]>;
+
+def REVSH : AI<(ops GPR:$dst, GPR:$src),
+               "revsh $dst, $src",
+               [(set GPR:$dst,
+                  (sext_inreg
+                    (or (srl (and GPR:$src, 0xFFFF), 8),
+                        (shl GPR:$src, 8)), i16))]>,
+               Requires<[IsARM, HasV6]>;
+
+def PKHBT : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+               "pkhbt $dst, $src1, $src2, LSL $shamt",
+               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+                                   (and (shl GPR:$src2, (i32 imm:$shamt)),
+                                        0xFFFF0000)))]>,
+               Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+               (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+               (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AI<(ops  GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+               "pkhtb $dst, $src1, $src2, ASR $shamt",
+               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+                                   (and (sra GPR:$src2, imm16_31:$shamt),
+                                        0xFFFF)))]>, Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+               (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+                   (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+               (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
 
-def FMULS   : FPBinOp<"fmuls", fmul>;
-def FMULD   : DFPBinOp<"fmuld", fmul>;
-def FDIVS   : FPBinOp<"fdivs", fdiv>;
-def FDIVD   : DFPBinOp<"fdivd", fdiv>;
 
-// Floating Point Load
-def FLDS  : InstARM<(ops FPRegs:$dst, op_addr_mode5:$addr),
-                     "flds $dst, $addr",
-                     [(set FPRegs:$dst, (load addr_mode5:$addr))]>;
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
 
-def FLDD  : InstARM<(ops DFPRegs:$dst, op_addr_mode5:$addr),
-                     "fldd $dst, $addr",
-                     [(set DFPRegs:$dst, (load addr_mode5:$addr))]>;
+defm CMP  : AI1_bin0_irs<"cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN  : AI1_bin0_irs<"cmn", BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
 
-// Floating Point Store
-def FSTS    : InstARM<(ops FPRegs:$src, op_addr_mode5:$addr),
-                       "fsts $src, $addr",
-                       [(store FPRegs:$src, addr_mode5:$addr)]>;
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+             (CMNri  GPR:$src, so_imm_neg:$imm)>;
 
-def FSTD    : InstARM<(ops DFPRegs:$src, op_addr_mode5:$addr),
-                       "fstd $src, $addr",
-                       [(store DFPRegs:$src, addr_mode5:$addr)]>;
+// Note that TST/TEQ don't set all the same flags that CMP does!
+def TSTrr : AI1<(ops GPR:$a, so_reg:$b), "tst $a, $b", []>;
+def TSTri : AI1<(ops GPR:$a, so_imm:$b), "tst $a, $b", []>;
+def TEQrr : AI1<(ops GPR:$a, so_reg:$b), "teq $a, $b", []>;
+def TEQri : AI1<(ops GPR:$a, so_imm:$b), "teq $a, $b", []>;
+
+// Conditional moves
+def MOVCCr : AI<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
+                "mov$cc $dst, $true",
+                [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>,
+                RegConstraint<"$false = $dst">;
+
+def MOVCCs : AI<(ops GPR:$dst, GPR:$false, so_reg:$true, CCOp:$cc),
+                "mov$cc $dst, $true",
+                [(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true,imm:$cc))]>,
+                RegConstraint<"$false = $dst">;
+
+def MOVCCi : AI<(ops GPR:$dst, GPR:$false, so_imm:$true, CCOp:$cc),
+                "mov$cc $dst, $true",
+                [(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true,imm:$cc))]>,
+                RegConstraint<"$false = $dst">;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AI1<(ops GPR:$dst, i32imm:$label),
+                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+def LEApcrelJT : AI1<(ops GPR:$dst, i32imm:$label, i32imm:$id),
+          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add $dst, pc, #PCRELV${:uid}")),
+                   []>;
 
-def : Pat<(ARMcall tglobaladdr:$dst),
-          (bl tglobaladdr:$dst)>;
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
 
-def : Pat<(ARMcall texternalsym:$dst),
-          (bl texternalsym:$dst)>;
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+          (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Large immediate handling.
+
+// Two piece so_imms.
+def : ARMPat<(i32 so_imm2part:$src),
+             (ORRri (MOVri (so_imm2part_1 imm:$src)),
+                    (so_imm2part_2 imm:$src))>;
+
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+              (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                     (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+              (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                     (so_imm2part_2 imm:$RHS))>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
+
+// truncstore i1 -> truncstore i8
+def : Pat<(truncstorei1 GPR:$src, addrmode2:$dst), 
+          (STRB GPR:$src, addrmode2:$dst)>;
+def : Pat<(pre_truncsti1 GPR:$src, GPR:$base, am2offset:$offset), 
+          (STRB_PRE GPR:$src, GPR:$base, am2offset:$offset)>;
+def : Pat<(post_truncsti1 GPR:$src, GPR:$base, am2offset:$offset), 
+          (STRB_POST GPR:$src, GPR:$base, am2offset:$offset)>;
 
-def : Pat<(extloadi8 IntRegs:$addr),
-          (LDRB IntRegs:$addr)>;
-def : Pat<(extloadi16 IntRegs:$addr),
-          (LDRH IntRegs:$addr)>;
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
 
-// extload bool -> extload byte
-def : Pat<(extloadi1 IntRegs:$addr), (LDRB IntRegs:$addr)>;
+include "ARMInstrThumb.td"
 
-// zextload bool -> zextload byte
-def : Pat<(i32 (zextloadi1 IntRegs:$addr)), (LDRB IntRegs:$addr)>;
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
 
-// truncstore bool -> truncstore byte.
-def : Pat<(truncstorei1 IntRegs:$src, IntRegs:$addr),
-          (STRB IntRegs:$src, IntRegs:$addr)>;
+include "ARMInstrVFP.td"


Index: llvm/lib/Target/ARM/ARMRegisterInfo.cpp
diff -u llvm/lib/Target/ARM/ARMRegisterInfo.cpp:1.33 llvm/lib/Target/ARM/ARMRegisterInfo.cpp:1.34
--- llvm/lib/Target/ARM/ARMRegisterInfo.cpp:1.33	Fri Jan 12 14:52:27 2007
+++ llvm/lib/Target/ARM/ARMRegisterInfo.cpp	Fri Jan 19 01:51:42 2007
@@ -13,243 +13,1023 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "ARMCommon.h"
+#include "ARMSubtarget.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Type.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <iostream>
 using namespace llvm;
 
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool hasFP(const MachineFunction &MF) {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
-}
-
-static void splitInstructionWithImmediate(MachineBasicBlock &BB,
-				       MachineBasicBlock::iterator I,
-				       const TargetInstrDescriptor &TID,
-				       unsigned DestReg,
-				       unsigned OrigReg,
-				       unsigned immediate){
-  std::vector<unsigned> immediatePieces = splitImmediate(immediate);
-  std::vector<unsigned>::iterator it;
-  for (it=immediatePieces.begin(); it != immediatePieces.end(); ++it){
-    BuildMI(BB, I, TID, DestReg).addReg(OrigReg)
-	.addImm(*it).addImm(0).addImm(ARMShift::LSL);
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace ARM;
+  switch (RegEnum) {
+  case R0:  case S0:  case D0:  return 0;
+  case R1:  case S1:  case D1:  return 1;
+  case R2:  case S2:  case D2:  return 2;
+  case R3:  case S3:  case D3:  return 3;
+  case R4:  case S4:  case D4:  return 4;
+  case R5:  case S5:  case D5:  return 5;
+  case R6:  case S6:  case D6:  return 6;
+  case R7:  case S7:  case D7:  return 7;
+  case R8:  case S8:  case D8:  return 8;
+  case R9:  case S9:  case D9:  return 9;
+  case R10: case S10: case D10: return 10;
+  case R11: case S11: case D11: return 11;
+  case R12: case S12: case D12: return 12;
+  case SP:  case S13: case D13: return 13;
+  case LR:  case S14: case D14: return 14;
+  case PC:  case S15: case D15: return 15;
+  case S16: return 16;
+  case S17: return 17;
+  case S18: return 18;
+  case S19: return 19;
+  case S20: return 20;
+  case S21: return 21;
+  case S22: return 22;
+  case S23: return 23;
+  case S24: return 24;
+  case S25: return 25;
+  case S26: return 26;
+  case S27: return 27;
+  case S28: return 28;
+  case S29: return 29;
+  case S30: return 30;
+  case S31: return 31;
+  default:
+    std::cerr << "Unknown ARM register!\n";
+    abort();
   }
 }
 
-ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii)
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+                                 const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
-    TII(tii) {
+    TII(tii), STI(sti),
+    FramePtr(STI.useThumbBacktraces() ? ARM::R7 : ARM::R11) {
+}
+
+bool ARMRegisterInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                MachineBasicBlock::iterator MI,
+                                const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (!AFI->isThumbFunction() || CSI.empty())
+    return false;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, TII.get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i)
+    MIB.addReg(CSI[i-1].getReg());
+  return true;
+}
+
+bool ARMRegisterInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (!AFI->isThumbFunction() || CSI.empty())
+    return false;
+
+  MachineInstr *PopMI = new MachineInstr(TII.get(ARM::tPOP));
+  MBB.insert(MI, PopMI);
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      Reg = ARM::PC;
+      PopMI->setInstrDescriptor(TII.get(ARM::tPOP_RET));
+      MBB.erase(MI);
+    }
+    PopMI->addRegOperand(Reg, true);
+  }
+  return true;
 }
 
 void ARMRegisterInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, int FI,
                     const TargetRegisterClass *RC) const {
-  assert (RC == ARM::IntRegsRegisterClass);
-  BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI).addImm(0);
+  if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    if (AFI->isThumbFunction())
+      BuildMI(MBB, I, TII.get(ARM::tSTRspi)).addReg(SrcReg)
+        .addFrameIndex(FI).addImm(0);
+    else
+      BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg)
+          .addFrameIndex(FI).addReg(0).addImm(0);
+  } else if (RC == ARM::DPRRegisterClass) {
+    BuildMI(MBB, I, TII.get(ARM::FSTD)).addReg(SrcReg)
+    .addFrameIndex(FI).addImm(0);
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    BuildMI(MBB, I, TII.get(ARM::FSTS)).addReg(SrcReg)
+      .addFrameIndex(FI).addImm(0);
+  }
 }
 
 void ARMRegisterInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  assert (RC == ARM::IntRegsRegisterClass);
-  BuildMI(MBB, I, TII.get(ARM::LDR), DestReg).addFrameIndex(FI).addImm(0);
+  if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    if (AFI->isThumbFunction())
+      BuildMI(MBB, I, TII.get(ARM::tLDRspi), DestReg)
+        .addFrameIndex(FI).addImm(0);
+    else
+      BuildMI(MBB, I, TII.get(ARM::LDR), DestReg)
+      .addFrameIndex(FI).addReg(0).addImm(0);
+  } else if (RC == ARM::DPRRegisterClass) {
+    BuildMI(MBB, I, TII.get(ARM::FLDD), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    BuildMI(MBB, I, TII.get(ARM::FLDS), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  }
 }
 
 void ARMRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I,
-                                     unsigned DestReg, unsigned SrcReg,
-                                     const TargetRegisterClass *RC) const {
-  assert(RC == ARM::IntRegsRegisterClass ||
-         RC == ARM::FPRegsRegisterClass  ||
-         RC == ARM::DFPRegsRegisterClass);
-
-  if (RC == ARM::IntRegsRegisterClass)
-    BuildMI(MBB, I, TII.get(ARM::MOV), DestReg).addReg(SrcReg).addImm(0)
-      .addImm(ARMShift::LSL);
-  else if (RC == ARM::FPRegsRegisterClass)
+                                   MachineBasicBlock::iterator I,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   const TargetRegisterClass *RC) const {
+  if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    BuildMI(MBB, I, TII.get(AFI->isThumbFunction() ? ARM::tMOVrr : ARM::MOVrr),
+            DestReg).addReg(SrcReg);
+  } else if (RC == ARM::SPRRegisterClass)
     BuildMI(MBB, I, TII.get(ARM::FCPYS), DestReg).addReg(SrcReg);
-  else
+  else if (RC == ARM::DPRRegisterClass)
     BuildMI(MBB, I, TII.get(ARM::FCPYD), DestReg).addReg(SrcReg);
+  else
+    abort();
 }
 
-MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr* MI,
-                                                   unsigned OpNum,
-                                                   int FI) const {
-  return NULL;
+MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr *MI,
+                                                 unsigned OpNum, int FI) const {
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::MOVrr: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      NewMI = BuildMI(TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI)
+        .addReg(0).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      NewMI = BuildMI(TII.get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0)
+        .addImm(0);
+    }
+    break;
+  }
+  case ARM::tMOVrr: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      NewMI = BuildMI(TII.get(ARM::tSTRspi)).addReg(SrcReg).addFrameIndex(FI)
+        .addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      NewMI = BuildMI(TII.get(ARM::tLDRspi), DstReg).addFrameIndex(FI)
+        .addImm(0);
+    }
+    break;
+  }
+  case ARM::FCPYS: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      NewMI = BuildMI(TII.get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI)
+        .addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      NewMI = BuildMI(TII.get(ARM::FLDS), DstReg).addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  case ARM::FCPYD: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      NewMI = BuildMI(TII.get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI)
+        .addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      NewMI = BuildMI(TII.get(ARM::FLDD), DstReg).addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  }
+
+  if (NewMI)
+    NewMI->copyKillDeadInfo(MI);
+  return NewMI;
 }
 
 const unsigned* ARMRegisterInfo::getCalleeSavedRegs() const {
   static const unsigned CalleeSavedRegs[] = {
-    ARM::R4,  ARM::R5, ARM::R6,  ARM::R7,
-    ARM::R8,  ARM::R9, ARM::R10, ARM::R11,
-    ARM::R14, 0
+    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+
+  static const unsigned DarwinCalleeSavedRegs[] = {
+    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
+    ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
   };
-  return CalleeSavedRegs;
+  return STI.isDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
 }
 
 const TargetRegisterClass* const *
 ARMRegisterInfo::getCalleeSavedRegClasses() const {
   static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
-    &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass,
-    &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass,
-    &ARM::IntRegsRegClass, 0
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
   };
   return CalleeSavedRegClasses;
 }
 
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+static bool hasFP(const MachineFunction &MF) {
+  return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitARMRegPlusImmediate - Emit a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI,
+                             unsigned DestReg, unsigned BaseReg,
+                             int NumBytes, const TargetInstrInfo &TII) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  while (NumBytes) {
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+    assert(ThisVal && "Didn't extract field correctly");
+    
+    // We will handle these bits from offset, clear them.
+    NumBytes &= ~ThisVal;
+    
+    // Get the properly encoded SOImmVal field.
+    int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+    assert(SOImmVal != -1 && "Bit extraction didn't work?");
+    
+    // Build the new ADD / SUB.
+    BuildMI(MBB, MBBI, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+      .addReg(BaseReg).addImm(SOImmVal);
+    BaseReg = DestReg;
+  }
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+static bool isLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// emitThumbRegPlusImmediate - Emit a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  unsigned NumBits = 1;
+  unsigned Opc = 0;
+  unsigned ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    Bytes >>= 2;  // Implicitly multiplied by 4.
+    NumBits = 7;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    Bytes >>= 2;  // Implicitly multiplied by 4.
+    NumBits = 8;
+    Opc = ARM::tADDrSPi;
+  } else {
+    if (DestReg != BaseReg) {
+      if (isLowRegister(DestReg) && isLowRegister(BaseReg)) {
+        // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+        unsigned Chunk = (1 << 3) - 1;
+        unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+        Bytes -= ThisVal;
+        BuildMI(MBB, MBBI, TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+          .addReg(BaseReg).addImm(ThisVal);
+      } else {
+        BuildMI(MBB, MBBI, TII.get(ARM::tMOVrr), DestReg).addReg(BaseReg);
+      }
+      BaseReg = DestReg;
+    }
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned Chunk = (1 << NumBits) - 1;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;    
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addImm(ThisVal);
+    else {
+      BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addReg(BaseReg).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, TII.get(ExtraOpc), DestReg).addReg(DestReg)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int NumBytes, bool isThumb, const TargetInstrInfo &TII) {
+  if (isThumb)
+    emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII);
+  else
+    emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII);
+}
+
 void ARMRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (hasFP(MF)) {
+  if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
     MachineInstr *Old = I;
     unsigned Amount = Old->getOperand(0).getImmedValue();
     if (Amount != 0) {
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
       unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
       Amount = (Amount+Align-1)/Align*Align;
 
+      // Replace the pseudo instruction with a new instruction...
       if (Old->getOpcode() == ARM::ADJCALLSTACKDOWN) {
-        // sub sp, sp, amount
-	splitInstructionWithImmediate(MBB, I, TII.get(ARM::SUB), ARM::R13,
-				   ARM::R13, Amount);
+        emitSPUpdate(MBB, I, -Amount, AFI->isThumbFunction(), TII);
       } else {
-        // add sp, sp, amount
         assert(Old->getOpcode() == ARM::ADJCALLSTACKUP);
-	splitInstructionWithImmediate(MBB, I, TII.get(ARM::ADD), ARM::R13,
-				   ARM::R13, Amount);
+        emitSPUpdate(MBB, I, Amount, AFI->isThumbFunction(), TII);
       }
     }
   }
   MBB.erase(I);
 }
 
-void
-ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const {
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, TII.get(ARM::tMOVri8), DestReg).addImm(ThisVal);
+  if (Imm > 0) 
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII);
+  if (isSub)
+    BuildMI(MBB, MBBI, TII.get(ARM::tNEG), DestReg).addReg(DestReg);
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{
+  unsigned i = 0;
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
 
-  assert (MI.getOpcode() == ARM::LDR ||
-          MI.getOpcode() == ARM::STR ||
-          MI.getOpcode() == ARM::ADD);
+  while (!MI.getOperand(i).isFrameIndex()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getFrameIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + 
+               MF.getFrameInfo()->getStackSize();
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+    Offset -= AFI->getDPRCalleeSavedAreaOffset();
+  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
 
-  unsigned FrameIdx = 1;
-  unsigned   OffIdx = 2;
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDescriptor &Desc = TII.get(Opcode);
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+  
+  if (Opcode == ARM::ADDri) {
+    Offset += MI.getOperand(i+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setInstrDescriptor(TII.get(ARM::MOVrr));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setInstrDescriptor(TII.get(ARM::SUBri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+    if (ImmedOffset != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+      return;
+    }
+    
+    // Otherwise, we fallback to common code below to form the imm offset with
+    // a sequence of ADDri instructions.  First though, pull as much of the imm
+    // into this ADDri as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, (32-RotAmt) & 31);
+    
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+    
+    // Get the properly encoded SOImmVal field.
+    int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");    
+    MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+  } else if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+    assert((Offset & 3) == 0 &&
+           "add/sub sp, #imm immediate must be multiple of 4!");
+    Offset >>= 2;
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setInstrDescriptor(TII.get(ARM::tMOVrr));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
 
-  int FrameIndex = MI.getOperand(FrameIdx).getFrameIndex();
+    // Common case: small offset, fits into instruction.
+    if ((Offset & ~255U) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset);
+      return;
+    }
 
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(OffIdx).getImmedValue();
+    unsigned DestReg = MI.getOperand(0).getReg();
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(255);
+      Offset = (Offset - 255) << 2;
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      Offset <<= 2;
+      emitThumbConstant(MBB, II, DestReg, Offset, TII);
+      MI.setInstrDescriptor(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode2: {
+      ImmIdx = i+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = i+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode5: {
+      ImmIdx = i+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    case ARMII::AddrModeTs: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    default:
+      std::cerr << "Unsupported addressing mode!\n";
+      abort();
+      break;
+    }
 
-  unsigned StackSize = MF.getFrameInfo()->getStackSize();
+    Offset += InstrOffs * Scale;
+    assert((Scale == 1 || (Offset & (Scale-1)) == 0) &&
+           "Can't encode this offset!");
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+    }
 
-  Offset += StackSize;
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      if (isSub)
+        ImmedOffset |= 1 << NumBits;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
 
-  assert (Offset >= 0);
-  unsigned BaseRegister = hasFP(MF) ? ARM::R11 : ARM::R13;
-  if (Offset < 4096) {
-    // Replace the FrameIndex with r13
-    MI.getOperand(FrameIdx).ChangeToRegister(BaseRegister, false);
-    // Replace the ldr offset with Offset
-    MI.getOperand(OffIdx).ChangeToImmediate(Offset);
+    // Otherwise, it didn't fit.  Pull in what we can to simplify the immediate.
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub)
+      ImmedOffset |= 1 << NumBits;
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
+  }
+  
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (isThumb) {
+    if (TII.isLoad(Opcode)) {
+      // Use the destination register to materialize sp + offset.
+      unsigned TmpReg = MI.getOperand(0).getReg();
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg,
+                                isSub ? -Offset : Offset, TII);
+      MI.getOperand(i).ChangeToRegister(TmpReg, false);
+    } else if (TII.isStore(Opcode)) {
+      // FIXME! This is horrific!!! We need register scavenging.
+      // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+      // also a ABI register so it's possible that is is the register that is
+      // being storing here. If that's the case, we do the following:
+      // r12 = r2
+      // Use r2 to materialize sp + offset
+      // str r12, r2
+      // r2 = r12
+      unsigned DestReg = MI.getOperand(0).getReg();
+      unsigned TmpReg = ARM::R3;
+      if (DestReg == ARM::R3) {
+        BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R12).addReg(ARM::R2);
+        TmpReg = ARM::R2;
+      }
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg,
+                                isSub ? -Offset : Offset, TII);
+      MI.getOperand(i).ChangeToRegister(DestReg, false);
+      if (DestReg == ARM::R3)
+        BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R2).addReg(ARM::R12);
+    } else
+      assert(false && "Unexpected opcode!");
   } else {
-    // Insert a set of r12 with the full address
-    // r12 = r13 + offset
-    MachineBasicBlock *MBB2 = MI.getParent();
-    splitInstructionWithImmediate(*MBB2, II, TII.get(ARM::ADD), ARM::R12,
-			       BaseRegister, Offset);
-
-    // Replace the FrameIndex with r12
-    MI.getOperand(FrameIdx).ChangeToRegister(ARM::R12, false);
+    // Insert a set of r12 with the full address: r12 = sp + offset
+    // If the offset we have is too large to fit into the instruction, we need
+    // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
+    // out of 'Offset'.
+    emitARMRegPlusImmediate(MBB, II, ARM::R12, FrameReg,
+                            isSub ? -Offset : Offset, TII);
+    MI.getOperand(i).ChangeToRegister(ARM::R12, false);
   }
 }
 
 void ARMRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register to
+  // take advantage the eliminateFrameIndex machinery. This also ensures it is
+  // spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool FramePtrSpilled = MF.getFrameInfo()->hasVarSizedObjects();
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  if (!FramePtrSpilled && NoFramePointerElim) {
+    // Don't spill FP if the frame can be eliminated. This is determined
+    // by scanning the callee-save registers to see if any is used.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+    for (unsigned i = 0; CSRegs[i]; ++i) {
+      unsigned Reg = CSRegs[i];
+      bool Spilled = false;
+      if (MF.isPhysRegUsed(Reg)) {
+        Spilled = true;
+        FramePtrSpilled = true;
+      } else {
+        // Check alias registers too.
+        for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+          if (MF.isPhysRegUsed(*Aliases)) {
+            Spilled = true;
+            FramePtrSpilled = true;
+          }
+        }
+      }
+
+      if (CSRegClasses[i] == &ARM::GPRRegClass) {
+        if (Spilled) {
+          NumGPRSpills++;
+
+          // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+          switch (Reg) {
+          case ARM::LR:
+            LRSpilled = true;
+            // Fallthrough
+          case ARM::R4:
+          case ARM::R5:
+          case ARM::R6:
+          case ARM::R7:
+            CS1Spilled = true;
+            break;
+          default:
+            break;
+          }
+        } else { 
+          switch (Reg) {
+          case ARM::R4:
+          case ARM::R5:
+          case ARM::R6:
+          case ARM::R7:
+          case ARM::LR:
+            UnspilledCS1GPRs.push_back(Reg);
+            break;
+          default:
+            UnspilledCS2GPRs.push_back(Reg);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  if (FramePtrSpilled) {
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    AFI->setFramePtrSpilled(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.changePhyRegUsed(ARM::LR, true);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling a odd number
+    // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty())
+        MF.changePhyRegUsed(UnspilledCS1GPRs.front(), true);
+      else
+        MF.changePhyRegUsed(UnspilledCS2GPRs.front(), true);
+    }
+    MF.changePhyRegUsed(FramePtr, true);
+  }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator &MBBI,
+                                   int Opc, unsigned Area,
+                                   const ARMSubtarget &STI) {
+  while (MBBI != MBB.end() &&
+         MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFrameIndex()) {
+    if (Area != 0) {
+      bool Done = false;
+      unsigned Category = 0;
+      switch (MBBI->getOperand(0).getReg()) {
+      case ARM::R4:
+      case ARM::R5:
+      case ARM::R6:
+      case ARM::R7:
+      case ARM::LR:
+        Category = 1;
+        break;
+      case ARM::R8:
+      case ARM::R9:
+      case ARM::R10:
+      case ARM::R11:
+        Category = STI.isDarwin() ? 2 : 1;
+        break;
+      case ARM::D8:
+      case ARM::D9:
+      case ARM::D10:
+      case ARM::D11:
+      case ARM::D12:
+      case ARM::D13:
+      case ARM::D14:
+      case ARM::D15:
+        Category = 3;
+        break;
+      default:
+        Done = true;
+        break;
+      }
+      if (Done || Category != Area)
+        break;
+    }
+
+    ++MBBI;
+  }
+}
 
 void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
-  int           NumBytes = (int) MFI->getStackSize();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+  if (AFI->isFramePtrSpilled()) {
+    if (VARegSaveSize)
+      emitSPUpdate(MBB, MBBI, -VARegSaveSize, isThumb, TII);
+
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      int FI = CSI[i].getFrameIdx();
+      switch (Reg) {
+      case ARM::R4:
+      case ARM::R5:
+      case ARM::R6:
+      case ARM::R7:
+      case ARM::LR:
+        if (Reg == FramePtr)
+          FramePtrSpillFI = FI;
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+        break;
+      case ARM::R8:
+      case ARM::R9:
+      case ARM::R10:
+      case ARM::R11:
+        if (Reg == FramePtr)
+          FramePtrSpillFI = FI;
+        if (STI.isDarwin()) {
+          AFI->addGPRCalleeSavedArea2Frame(FI);
+          GPRCS2Size += 4;
+        } else {
+          AFI->addGPRCalleeSavedArea1Frame(FI);
+          GPRCS1Size += 4;
+        }
+        break;
+      default:
+        AFI->addDPRCalleeSavedAreaFrame(FI);
+        DPRCSSize += 8;
+      }
+    }
+
+    if (!isThumb) {
+      // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+      emitSPUpdate(MBB, MBBI, -GPRCS1Size, isThumb, TII);
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+    } else {
+      if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH)
+        ++MBBI;
+    }
 
-  bool HasFP = hasFP(MF);
+    // Point FP to the stack slot that contains the previous FP.
+    BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+
+    if (!isThumb) {
+      // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+      emitSPUpdate(MBB, MBBI, -GPRCS2Size, false, TII);
+
+      // Build the new SUBri to adjust SP for FP callee-save spill area.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+      emitSPUpdate(MBB, MBBI, -DPRCSSize, false, TII);
+    }
+  }
 
-  if (MFI->hasCalls()) {
+  // If necessary, add one more SUBri to account for the call frame
+  // and/or local storage, alloca area.
+  if (MFI->hasCalls())
     // We reserve argument space for call sites in the function immediately on
     // entry to the current function.  This eliminates the need for add/sub
     // brackets around call sites.
-    NumBytes += MFI->getMaxCallFrameSize();
-  }
-
-  if (HasFP)
-    // Add space for storing the FP
-    NumBytes += 4;
-
-  // Align to 8 bytes
-  NumBytes = ((NumBytes + 7) / 8) * 8;
+    if (!MF.getFrameInfo()->hasVarSizedObjects())
+      NumBytes += MFI->getMaxCallFrameSize();
 
+  // Round the size to a multiple of the alignment.
+  NumBytes = (NumBytes+Align-1)/Align*Align;
   MFI->setStackSize(NumBytes);
 
-  if (NumBytes) {
-    //sub sp, sp, #NumBytes
-    splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::SUB), ARM::R13,
-                                  ARM::R13, NumBytes);
-  }
-
+  // Determine starting offsets of spill areas.
+  if (AFI->isFramePtrSpilled()) {
+    unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+    unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+    unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+    AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+    AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+    AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+  
+    NumBytes = DPRCSOffset;
+    if (NumBytes) {
+      // Insert it after all the callee-save spills.
+      if (!isThumb)
+        movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+      emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
+    }
+  } else 
+    emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
 
-  if (HasFP) {
-    BuildMI(MBB, MBBI, TII.get(ARM::STR))
-      .addReg(ARM::R11).addReg(ARM::R13).addImm(0);
-    BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R11).addReg(ARM::R13).addImm(0).
-      addImm(ARMShift::LSL);
-  }
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return ((MI->getOpcode() == ARM::FLDD ||
+           MI->getOpcode() == ARM::LDR  ||
+           MI->getOpcode() == ARM::tLDRspi) &&
+          MI->getOperand(1).isFrameIndex() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
 }
 
 void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
 				   MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert(MBBI->getOpcode() == ARM::bx &&
+  assert((MBBI->getOpcode() == ARM::BX_RET ||
+          MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
          "Can only insert epilog into returning blocks");
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  int          NumBytes = (int) MFI->getStackSize();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  if (AFI->isFramePtrSpilled()) {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
 
-  if (hasFP(MF)) {
-    BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R13).addReg(ARM::R11).addImm(0).
-      addImm(ARMShift::LSL);
-    BuildMI(MBB, MBBI, TII.get(ARM::LDR), ARM::R11).addReg(ARM::R13).addImm(0);
-  }
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+    if (isThumb)
+      emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
+    else {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot.
+      if (AFI->getGPRCalleeSavedArea2Size() ||
+          AFI->getDPRCalleeSavedAreaSize()  ||
+          AFI->getDPRCalleeSavedAreaOffset())
+        if (NumBytes)
+          BuildMI(MBB, MBBI, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+            .addImm(NumBytes);
+        else
+          BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tMOVrr : ARM::MOVrr),
+                  ARM::SP).addReg(FramePtr);
+
+      // Move SP to start of integer callee save spill area 2.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), false, TII);
+
+      // Move SP to start of integer callee save spill area 1.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), false, TII);
+
+      // Move SP to SP upon entry to the function.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), false, TII);
+    }
 
-  if (NumBytes){
-    //add sp, sp, #NumBytes
-    splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::ADD), ARM::R13,
-                                  ARM::R13, NumBytes);
+    if (VARegSaveSize)
+      emitSPUpdate(MBB, MBBI, VARegSaveSize, isThumb, TII);
+  } else if (NumBytes != 0) {
+    emitSPUpdate(MBB, MBBI, NumBytes, isThumb, TII);
   }
-
 }
 
 unsigned ARMRegisterInfo::getRARegister() const {
-  return ARM::R14;
+  return ARM::LR;
 }
 
 unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
-  return hasFP(MF) ? ARM::R11 : ARM::R13;
+  return STI.useThumbBacktraces() ? ARM::R7 : ARM::R11;
 }
 
 #include "ARMGenRegisterInfo.inc"


Index: llvm/lib/Target/ARM/ARMRegisterInfo.h
diff -u llvm/lib/Target/ARM/ARMRegisterInfo.h:1.4 llvm/lib/Target/ARM/ARMRegisterInfo.h:1.5
--- llvm/lib/Target/ARM/ARMRegisterInfo.h:1.4	Tue Jan  2 15:31:55 2007
+++ llvm/lib/Target/ARM/ARMRegisterInfo.h	Fri Jan 19 01:51:42 2007
@@ -17,18 +17,36 @@
 
 #include "llvm/Target/MRegisterInfo.h"
 #include "ARMGenRegisterInfo.h.inc"
+#include <set>
 
 namespace llvm {
-
-class Type;
-class TargetInstrInfo;
+  class TargetInstrInfo;
+  class ARMSubtarget;
+  class Type;
 
 struct ARMRegisterInfo : public ARMGenRegisterInfo {
   const TargetInstrInfo &TII;
-
-  ARMRegisterInfo(const TargetInstrInfo &tii);
+  const ARMSubtarget &STI;
+private:
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+public:
+  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// ARM::LR, return the number that it corresponds to (e.g. 14).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
 
   /// Code Generation virtual methods...
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
                            unsigned SrcReg, int FrameIndex,
@@ -43,9 +61,8 @@
                     unsigned DestReg, unsigned SrcReg,
                     const TargetRegisterClass *RC) const;
 
-  virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
-                                          unsigned OpNum,
-                                          int FrameIndex) const;
+  MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
+                                  int FrameIndex) const;
 
   const unsigned *getCalleeSavedRegs() const;
 
@@ -57,7 +74,7 @@
 
   void eliminateFrameIndex(MachineBasicBlock::iterator II) const;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const;
 
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;


Index: llvm/lib/Target/ARM/ARMRegisterInfo.td
diff -u llvm/lib/Target/ARM/ARMRegisterInfo.td:1.6 llvm/lib/Target/ARM/ARMRegisterInfo.td:1.7
--- llvm/lib/Target/ARM/ARMRegisterInfo.td:1.6	Thu Oct 26 08:31:25 2006
+++ llvm/lib/Target/ARM/ARMRegisterInfo.td	Fri Jan 19 01:51:42 2007
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,129 +13,169 @@
 //===----------------------------------------------------------------------===//
 
 // Registers are identified with 4-bit ID numbers.
-class ARMReg<string n> : Register<n> {
+class ARMReg<bits<4> num, string n, list<Register> aliases = []> : Register<n> {
+  field bits<4> Num;
   let Namespace = "ARM";
+  let Aliases = aliases;
 }
 
-// Ri - 32-bit integer registers
-class Ri<bits<4> num, string n> : ARMReg<n> {
-  field bits<4> Num;
-  let Num = num;
-}
-// Rf - 32-bit floating-point registers
-class Rf<bits<5> num, string n> : ARMReg<n> {
-  field bits<5> Num;
-  let Num = num;
-}
-// Rd - Slots in the FP register file for 64-bit floating-point values.
-class Rd<bits<5> num, string n, list<Register> aliases> : ARMReg<n> {
+class ARMFReg<bits<5> num, string n> : Register<n> {
   field bits<5> Num;
-  let Num = num;
-  let Aliases = aliases;
+  let Namespace = "ARM";
 }
 
 // Integer registers
-def R0  : Ri< 0, "R0">,  DwarfRegNum<0>;
-def R1  : Ri< 1, "R1">,  DwarfRegNum<1>;
-def R2  : Ri< 2, "R2">,  DwarfRegNum<2>;
-def R3  : Ri< 3, "R3">,  DwarfRegNum<3>;
-def R4  : Ri< 4, "R4">,  DwarfRegNum<4>;
-def R5  : Ri< 5, "R5">,  DwarfRegNum<5>;
-def R6  : Ri< 6, "R6">,  DwarfRegNum<6>;
-def R7  : Ri< 7, "R7">,  DwarfRegNum<7>;
-def R8  : Ri< 8, "R8">,  DwarfRegNum<8>;
-def R9  : Ri< 9, "R9">,  DwarfRegNum<9>;
-def R10 : Ri<10, "R10">, DwarfRegNum<10>;
-def R11 : Ri<11, "R11">, DwarfRegNum<11>;
-def R12 : Ri<12, "R12">, DwarfRegNum<12>;
-def R13 : Ri<13, "R13">, DwarfRegNum<13>;
-def R14 : Ri<14, "R14">, DwarfRegNum<14>;
-def R15 : Ri<15, "R15">, DwarfRegNum<15>;
-
-// TODO: update to VFP-v3
-// Floating-point registers
-def S0  : Rf< 0,  "S0">, DwarfRegNum<64>;
-def S1  : Rf< 1,  "S1">, DwarfRegNum<65>;
-def S2  : Rf< 2,  "S2">, DwarfRegNum<66>;
-def S3  : Rf< 3,  "S3">, DwarfRegNum<67>;
-def S4  : Rf< 4,  "S4">, DwarfRegNum<68>;
-def S5  : Rf< 5,  "S5">, DwarfRegNum<69>;
-def S6  : Rf< 6,  "S6">, DwarfRegNum<70>;
-def S7  : Rf< 7,  "S7">, DwarfRegNum<71>;
-def S8  : Rf< 8,  "S8">, DwarfRegNum<72>;
-def S9  : Rf< 9,  "S9">, DwarfRegNum<73>;
-def S10 : Rf<10, "S10">, DwarfRegNum<74>;
-def S11 : Rf<11, "S11">, DwarfRegNum<75>;
-def S12 : Rf<12, "S12">, DwarfRegNum<76>;
-def S13 : Rf<13, "S13">, DwarfRegNum<77>;
-def S14 : Rf<14, "S14">, DwarfRegNum<78>;
-def S15 : Rf<15, "S15">, DwarfRegNum<79>;
-def S16 : Rf<16, "S16">, DwarfRegNum<80>;
-def S17 : Rf<17, "S17">, DwarfRegNum<81>;
-def S18 : Rf<18, "S18">, DwarfRegNum<82>;
-def S19 : Rf<19, "S19">, DwarfRegNum<83>;
-def S20 : Rf<20, "S20">, DwarfRegNum<84>;
-def S21 : Rf<21, "S21">, DwarfRegNum<85>;
-def S22 : Rf<22, "S22">, DwarfRegNum<86>;
-def S23 : Rf<23, "S23">, DwarfRegNum<87>;
-def S24 : Rf<24, "S24">, DwarfRegNum<88>;
-def S25 : Rf<25, "S25">, DwarfRegNum<89>;
-def S26 : Rf<26, "S26">, DwarfRegNum<90>;
-def S27 : Rf<27, "S27">, DwarfRegNum<91>;
-def S28 : Rf<28, "S28">, DwarfRegNum<92>;
-def S29 : Rf<29, "S29">, DwarfRegNum<93>;
-def S30 : Rf<30, "S30">, DwarfRegNum<94>;
-def S31 : Rf<31, "S31">, DwarfRegNum<95>;
-
-// Aliases of the S* registers used to hold 64-bit fp values (doubles)
-def D0  : Rd< 0,  "D0", [S0,   S1]>, DwarfRegNum<64>;
-def D1  : Rd< 2,  "D1", [S2,   S3]>, DwarfRegNum<66>;
-def D2  : Rd< 4,  "D2", [S4,   S5]>, DwarfRegNum<68>;
-def D3  : Rd< 6,  "D3", [S6,   S7]>, DwarfRegNum<70>;
-def D4  : Rd< 8,  "D4", [S8,   S9]>, DwarfRegNum<72>;
-def D5  : Rd<10,  "D5", [S10, S11]>, DwarfRegNum<74>;
-def D6  : Rd<12,  "D6", [S12, S13]>, DwarfRegNum<76>;
-def D7  : Rd<14,  "D7", [S14, S15]>, DwarfRegNum<78>;
-def D8  : Rd<16,  "D8", [S16, S17]>, DwarfRegNum<80>;
-def D9  : Rd<18,  "D9", [S18, S19]>, DwarfRegNum<82>;
-def D10 : Rd<20, "D10", [S20, S21]>, DwarfRegNum<84>;
-def D11 : Rd<22, "D11", [S22, S23]>, DwarfRegNum<86>;
-def D12 : Rd<24, "D12", [S24, S25]>, DwarfRegNum<88>;
-def D13 : Rd<26, "D13", [S26, S27]>, DwarfRegNum<90>;
-def D14 : Rd<28, "D14", [S28, S29]>, DwarfRegNum<92>;
-def D15 : Rd<30, "D15", [S30, S31]>, DwarfRegNum<94>;
+def R0  : ARMReg< 0, "r0">,  DwarfRegNum<0>;
+def R1  : ARMReg< 1, "r1">,  DwarfRegNum<1>;
+def R2  : ARMReg< 2, "r2">,  DwarfRegNum<2>;
+def R3  : ARMReg< 3, "r3">,  DwarfRegNum<3>;
+def R4  : ARMReg< 4, "r4">,  DwarfRegNum<4>;
+def R5  : ARMReg< 5, "r5">,  DwarfRegNum<5>;
+def R6  : ARMReg< 6, "r6">,  DwarfRegNum<6>;
+def R7  : ARMReg< 7, "r7">,  DwarfRegNum<7>;
+def R8  : ARMReg< 8, "r8">,  DwarfRegNum<8>;
+def R9  : ARMReg< 9, "r9">,  DwarfRegNum<9>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<10>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<11>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<12>;
+def SP  : ARMReg<13, "sp">,  DwarfRegNum<13>;
+def LR  : ARMReg<14, "lr">,  DwarfRegNum<14>;
+def PC  : ARMReg<15, "pc">,  DwarfRegNum<15>;
+
+// Float registers
+def S0  : ARMFReg< 0, "s0">;  def S1  : ARMFReg< 1, "s1">;
+def S2  : ARMFReg< 2, "s2">;  def S3  : ARMFReg< 3, "s3">;
+def S4  : ARMFReg< 4, "s4">;  def S5  : ARMFReg< 5, "s5">;
+def S6  : ARMFReg< 6, "s6">;  def S7  : ARMFReg< 7, "s7">;
+def S8  : ARMFReg< 8, "s8">;  def S9  : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>; 
+def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
+def D3  : ARMReg< 3,  "d3", [S6,   S7]>;
+def D4  : ARMReg< 4,  "d4", [S8,   S9]>;
+def D5  : ARMReg< 5,  "d5", [S10, S11]>;
+def D6  : ARMReg< 6,  "d6", [S12, S13]>;
+def D7  : ARMReg< 7,  "d7", [S14, S15]>;
+def D8  : ARMReg< 8,  "d8", [S16, S17]>;
+def D9  : ARMReg< 9,  "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
 
 // Register classes.
 //
-// FIXME: the register order should be defined in terms of the preferred
-// allocation order...
+// pc  == Program Counter
+// lr  == Link Register
+// sp  == Stack Pointer
+// r12 == ip (scratch)
+// r7  == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
 //
-def IntRegs : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-	                                       R7, R8, R9, R10, R11, R12,
-                                               R13, R14, R15]> {
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+                                           R7, R8, R9, R10, R12, R11,
+                                           LR, SP, PC]> {
   let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
+  // FIXME: We are reserving r12 in case the PEI needs to use it to
+  // generate large stack offset. Make it available once we have register
+  // scavenging.
   let MethodBodies = [{
-    IntRegsClass::iterator
-    IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
-      // r15 == Program Counter
-      // r14 == Link Register
-      // r13 == Stack Pointer
-      // r12 == ip (scratch)
-      // r11 == Frame Pointer
-      // r10 == Stack Limit
-      if (hasFP(MF))
-        return end() - 5;
-      else
-        return end() - 4;
+    // FP is R11, R9 is available.
+    static const unsigned ARM_GPR_AO_1[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R9, ARM::R10,
+      ARM::LR, ARM::R11 };
+    // FP is R11, R9 is not available.
+    static const unsigned ARM_GPR_AO_2[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R10,
+      ARM::LR, ARM::R11 };
+    // FP is R7, R9 is available.
+    static const unsigned ARM_GPR_AO_3[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R8,
+      ARM::R9, ARM::R10,ARM::R11,
+      ARM::LR, ARM::R7 };
+    // FP is R7, R9 is not available.
+    static const unsigned ARM_GPR_AO_4[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R8,
+      ARM::R10,ARM::R11,
+      ARM::LR, ARM::R7 };
+    // FP is R7, only low registers available.
+    static const unsigned THUMB_GPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+    GPRClass::iterator
+    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb())
+        return THUMB_GPR_AO;
+      if (Subtarget.useThumbBacktraces()) {
+        if (Subtarget.isR9Reserved())
+          return ARM_GPR_AO_4;
+        else
+          return ARM_GPR_AO_3;
+      } else {
+        if (Subtarget.isR9Reserved())
+          return ARM_GPR_AO_2;
+        else
+          return ARM_GPR_AO_1;
+      }
+    }
+
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      GPRClass::iterator I;
+      if (Subtarget.isThumb())
+        I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+      else if (Subtarget.useThumbBacktraces()) {
+        if (Subtarget.isR9Reserved())
+          I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+        else
+          I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+      } else {
+        if (Subtarget.isR9Reserved())
+          I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+        else
+          I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+      }
+
+      return hasFP(MF) ? I-1 : I;
     }
   }];
 }
 
-def FPRegs : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
   S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
   S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
 
-def DFPRegs : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
-  D8, D9, D10, D11, D12, D13, D14, D15]>;
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+  D9, D10, D11, D12, D13, D14, D15]>;


Index: llvm/lib/Target/ARM/ARMTargetAsmInfo.cpp
diff -u llvm/lib/Target/ARM/ARMTargetAsmInfo.cpp:1.3 llvm/lib/Target/ARM/ARMTargetAsmInfo.cpp:1.4
--- llvm/lib/Target/ARM/ARMTargetAsmInfo.cpp:1.3	Thu Dec 28 07:13:00 2006
+++ llvm/lib/Target/ARM/ARMTargetAsmInfo.cpp	Fri Jan 19 01:51:42 2007
@@ -12,18 +12,50 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetAsmInfo.h"
-
+#include "ARMTargetMachine.h"
 using namespace llvm;
 
 ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) {
-  Data16bitsDirective = "\t.half\t";
-  Data32bitsDirective = "\t.word\t";
+  const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  if (Subtarget->isDarwin()) {
+    HasDotTypeDotSizeDirective = false;
+    PrivateGlobalPrefix = "L";
+    GlobalPrefix = "_";
+    ZeroDirective = "\t.space\t";
+    SetDirective = "\t.set";
+    WeakRefDirective = "\t.weak_reference\t";
+    JumpTableDataSection = ".const";
+    CStringSection = "\t.cstring";
+    StaticCtorsSection = ".mod_init_func";
+    StaticDtorsSection = ".mod_term_func";
+    InlineAsmStart = "@ InlineAsm Start";
+    InlineAsmEnd = "@ InlineAsm End";
+    LCOMMDirective = "\t.lcomm\t";
+    COMMDirectiveTakesAlignment = false;
+    
+    NeedsSet = true;
+    DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+    DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+    DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+    DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+    DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+    DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+    DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+    DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+    DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+    DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+    DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+  } else {
+    Data16bitsDirective = "\t.half\t";
+    Data32bitsDirective = "\t.word\t";
+    ZeroDirective = "\t.skip\t";
+    WeakRefDirective = "\t.weak\t";
+    StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+    StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+  }
+  AlignmentIsInBytes = false; 
   Data64bitsDirective = 0;
-  ZeroDirective = "\t.skip\t";
   CommentString = "@";
+  DataSection = "\t.data";
   ConstantPoolSection = "\t.text\n";
-  AlignmentIsInBytes = false;
-  WeakRefDirective = "\t.weak\t";
-  StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
-  StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
 }


Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp
diff -u llvm/lib/Target/ARM/ARMTargetMachine.cpp:1.13 llvm/lib/Target/ARM/ARMTargetMachine.cpp:1.14
--- llvm/lib/Target/ARM/ARMTargetMachine.cpp:1.13	Thu Nov  2 21:08:28 2006
+++ llvm/lib/Target/ARM/ARMTargetMachine.cpp	Fri Jan 19 01:51:42 2007
@@ -11,30 +11,32 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMTargetAsmInfo.h"
 #include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
 #include "ARMFrameInfo.h"
 #include "ARM.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+                              cl::desc("Disable load store optimization pass"));
+
 namespace {
   // Register the target.
   RegisterTarget<ARMTargetMachine> X("arm", "  ARM");
 }
 
-
-const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
-  return new ARMTargetAsmInfo(*this);
-}
-
-
 /// TargetMachine ctor - Create an ILP32 architecture model
 ///
 ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
-  : DataLayout("e-p:32:32") {
+  : Subtarget(M, FS), DataLayout("e-p:32:32-d:32"), InstrInfo(Subtarget),
+    FrameInfo(Subtarget) {
+  if (Subtarget.isDarwin())
+    NoFramePointerElim = true;
 }
 
 unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
@@ -49,14 +51,23 @@
 }
 
 
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+  return new ARMTargetAsmInfo(*this);
+}
+
+
 // Pass Pipeline Configuration
 bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
   PM.add(createARMISelDag(*this));
   return false;
 }
 
-bool ARMTargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) {
-  PM.add(createARMFixMulPass());
+bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+  if (!Fast && !DisableLdStOpti && !Subtarget.isThumb())
+    PM.add(createARMLoadStoreOptimizationPass());
+  
+  PM.add(createARMConstantIslandPass());
   return true;
 }
 


Index: llvm/lib/Target/ARM/ARMTargetMachine.h
diff -u llvm/lib/Target/ARM/ARMTargetMachine.h:1.6 llvm/lib/Target/ARM/ARMTargetMachine.h:1.7
--- llvm/lib/Target/ARM/ARMTargetMachine.h:1.6	Tue Sep 19 10:49:24 2006
+++ llvm/lib/Target/ARM/ARMTargetMachine.h	Fri Jan 19 01:51:42 2007
@@ -20,19 +20,17 @@
 #include "llvm/Target/TargetFrameInfo.h"
 #include "ARMInstrInfo.h"
 #include "ARMFrameInfo.h"
+#include "ARMSubtarget.h"
 
 namespace llvm {
 
 class Module;
 
 class ARMTargetMachine : public LLVMTargetMachine {
-  const TargetData DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo InstrInfo;
-  ARMFrameInfo FrameInfo;
-  
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
+  ARMSubtarget      Subtarget;
+  const TargetData  DataLayout;       // Calculates type size & alignment
+  ARMInstrInfo      InstrInfo;
+  ARMFrameInfo      FrameInfo;
 public:
   ARMTargetMachine(const Module &M, const std::string &FS);
 
@@ -42,11 +40,14 @@
     return &InstrInfo.getRegisterInfo();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
   static unsigned getModuleMatchQuality(const Module &M);
 
+  virtual const TargetAsmInfo *createTargetAsmInfo() const;
+  
   // Pass Pipeline Configuration
   virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
-  virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
+  virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
   virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast, 
                                   std::ostream &Out);
 };


Index: llvm/lib/Target/ARM/Makefile
diff -u llvm/lib/Target/ARM/Makefile:1.1 llvm/lib/Target/ARM/Makefile:1.2
--- llvm/lib/Target/ARM/Makefile:1.1	Sun May 14 17:18:28 2006
+++ llvm/lib/Target/ARM/Makefile	Fri Jan 19 01:51:42 2007
@@ -7,6 +7,7 @@
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
+
 LEVEL = ../../..
 LIBRARYNAME = LLVMARM
 TARGET = ARM
@@ -15,7 +16,6 @@
 BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
                 ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
-                ARMGenDAGISel.inc
+                ARMGenDAGISel.inc ARMGenSubtarget.inc
 
 include $(LEVEL)/Makefile.common
-


Index: llvm/lib/Target/ARM/README.txt
diff -u llvm/lib/Target/ARM/README.txt:1.9 llvm/lib/Target/ARM/README.txt:1.10
--- llvm/lib/Target/ARM/README.txt:1.9	Mon Dec 11 17:56:10 2006
+++ llvm/lib/Target/ARM/README.txt	Fri Jan 19 01:51:42 2007
@@ -2,69 +2,438 @@
 // Random ideas for the ARM backend.
 //===---------------------------------------------------------------------===//
 
-Consider implementing a select with two conditional moves:
+Reimplement 'select' in terms of 'SEL'.
 
-cmp x, y
-moveq dst, a
-movne dst, b
+* We would really like to support UXTAB16, but we need to prove that the
+  add doesn't need to overflow between the two 16-bit chunks.
 
-----------------------------------------------------------
+* implement predication support
+* Implement pre/post increment support.  (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
 
+* Consider materializing FP constants like 0.0f and 1.0f using integer 
+  immediate instructions then copy to FPU.  Slower than load into FPU?
 
-%tmp1 = shl int %b, ubyte %c
-%tmp4 = add int %a, %tmp1
+//===---------------------------------------------------------------------===//
 
-compiles to
+The constant island pass is extremely naive.  If a constant pool entry is
+out of range, it *always* splits a block and inserts a copy of the cp 
+entry inline.  It should:
 
-add r0, r0, r1, lsl r2
+1. Check to see if there is already a copy of this constant nearby.  If so, 
+   reuse it.
+2. Instead of always splitting blocks to insert the constant, insert it in 
+   nearby 'water'.
+3. Constant island references should be ref counted.  If a constant reference
+   is out-of-range, and the last reference to a constant is relocated, the
+   dead constant should be removed.
 
-but
+This pass has all the framework needed to implement this, but it hasn't 
+been done.
 
-%tmp1 = shl int %b, ubyte %c
-%tmp4 = add int %tmp1, %a
+//===---------------------------------------------------------------------===//
 
-compiles to
-mov r1, r1, lsl r2
-add r0, r1, r0
+We need to start generating predicated instructions.  The .td files have a way
+to express this now (see the PPC conditional return instruction), but the 
+branch folding pass (or a new if-cvt pass) should start producing these, at
+least in the trivial case.
 
----------------------------------------------------------
-%tmp1 = shl int %b, ubyte 4
-%tmp2 = add int %a, %tmp1
+Among the obvious wins, doing so can eliminate the need to custom expand 
+copysign (i.e. we won't need to custom expand it to get the conditional
+negate).
 
-compiles to
+//===---------------------------------------------------------------------===//
 
-mov r2, #4
-add r0, r0, r1, lsl r2
+Implement long long "X-3" with instructions that fold the immediate in.  These
+were disabled due to badness with the ARM carry flag on subtracts.
 
-should be
+//===---------------------------------------------------------------------===//
 
-add r0, r0, r1, lsl #4
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
 
-----------------------------------------------------------
+into:
 
-add an offset to FLDS/FLDD/FSTD/FSTS addressing mode
+_foo:
+        rsb r1, r0, #0
+        cmn r0, #1
+        movgt r1, r0
+        mov r0, r1
+        bx lr
 
-----------------------------------------------------------
+This is very, uh, literal.  This could be a 3 operation sequence:
+  t = (p sra 31); 
+  res = (p xor t)-t
 
-the function
+Which would be better.  This occurs in png decode.
 
-void %f() {
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+   when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
+
+	fldd d0, [r4]
+	fstd d0, [r4, #+32]
+	fldd d0, [r4, #+8]
+	fstd d0, [r4, #+40]
+	fldd d0, [r4, #+16]
+	fstd d0, [r4, #+48]
+	fldd d0, [r4, #+24]
+	fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+	ldr r0, LCPI17_3
+	ldr r1, LCPI17_4
+	ldr r2, LCPI17_5
+
+   as
+	ldr r0, LCPI17
+	ldr r1, LCPI17+4
+	ldr r2, LCPI17+8
+
+   Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+	adr	r0, L6
+	ldmia	r0, {r0-r1}
+
+	.align 2
+L6:
+	.long	-858993459
+	.long	1074318540
+
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {  
+  double r = foo(3.1);
+  return x+r;
+}
+
+_bar:
+	sub sp, sp, #16
+	str r4, [sp, #+12]
+	str r5, [sp, #+8]
+	str lr, [sp, #+4]
+	mov r4, r0
+	mov r5, r1
+	ldr r0, LCPI2_0
+	bl _foo
+	fmsr f0, r0
+	fcvtsd d0, f0
+	fmdrr d1, r4, r5
+	faddd d0, d0, d1
+	fmrrd r0, r1, d0
+	ldr lr, [sp, #+4]
+	ldr r5, [sp, #+8]
+	ldr r4, [sp, #+12]
+	add sp, sp, #16
+	bx lr
+
+Ignore the prologue and epilogue stuff for a second. Note 
+	mov r4, r0
+	mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+  double d1;
+  int s1;
+};
+
+void foo(struct s S) {
+  printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+	stmfd	sp!, {r7, lr}
+	add	r7, sp, #0
+	sub	sp, sp, #12
+	stmia	sp, {r0, r1, r2}
+	ldmia	sp, {r1-r2}
+	ldr	r0, L5
+	ldr	r3, [sp, #8]
+L2:
+	add	r0, pc, r0
+	bl	L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+  struct s S = {1.1, 2};
+  return S;
+}
+
+	mov	ip, r0
+	ldr	r0, L5
+	sub	sp, sp, #12
+L2:
+	add	r0, pc, r0
+	@ lr needed for prologue
+	ldmia	r0, {r0, r1, r2}
+	stmia	sp, {r0, r1, r2}
+	stmia	ip, {r0, r1, r2}
+	mov	r0, ip
+	add	sp, sp, #12
+	bx	lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
 entry:
-	call void %g( int 1, int 2, int 3, int 4, int 5 )
+	%S = alloca %struct.s, align 4		; <%struct.s*> [#uses=1]
+	%memtmp = alloca %struct.s		; <%struct.s*> [#uses=1]
+	cast %struct.s* %S to sbyte*		; <sbyte*>:0 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+	cast %struct.s* %agg.result to sbyte*		; <sbyte*>:1 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+	cast %struct.s* %memtmp to sbyte*		; <sbyte*>:2 [#uses=1]
+	call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
 	ret void
 }
 
-declare void %g(int, int, int, int, int)
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+  beq LBB3
+  b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+We need register scavenging.  Currently, the 'ip' register is reserved in case
+frame indexes are too big.  This means that we generate extra code for stuff 
+like this:
+
+void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) { 
+   short Rconst = (short) (16384.0f * 1.40200 + 0.5 );
+   *a = x * Rconst;
+   *b = y * Rconst;
+   *c = z * Rconst;
+}
+
+we compile it to:
+
+_foo:
+***     stmfd sp!, {r4, r7}
+***     add r7, sp, #4
+        mov r4, #186
+        orr r4, r4, #89, 24 @ 22784
+        mul r0, r0, r4
+        str r0, [r3]
+        mul r0, r1, r4
+        ldr r1, [sp, #+8]
+        str r0, [r1]
+        mul r0, r2, r4
+        ldr r1, [sp, #+12]
+        str r0, [r1]
+***     sub sp, r7, #4
+***     ldmfd sp!, {r4, r7}
+        bx lr
+
+GCC produces:
+
+_foo:
+        ldr     ip, L4
+        mul     r0, ip, r0
+        mul     r1, ip, r1
+        str     r0, [r3, #0]
+        ldr     r3, [sp, #0]
+        mul     r2, ip, r2
+        str     r1, [r3, #0]
+        ldr     r3, [sp, #4]
+        str     r2, [r3, #0]
+        bx      lr
+L4:
+        .long   22970
+
+This is apparently all because we couldn't use ip here.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+   to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+   patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates.  For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+    int i = 0;
+
+    if (StackPtr != 0) {
+       while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+          Line[i++] = Stack[--StackPtr];
+        if (LineLen > 32768)
+        {
+            while (StackPtr != 0 && i < LineLen)
+            {
+                i++;
+                --StackPtr;
+            }
+        }
+    }
+    return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
 
-Only needs 8 bytes of stack space. We currently allocate 16.
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
 
-----------------------------------------------------------
+//===---------------------------------------------------------------------===//
 
-32 x 32 -> 64 multiplications currently uses two instructions. We
-should try to declare smull and umull as returning two values.
+At some point, we should triage these to see if they still apply to us:
 
-----------------------------------------------------------
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
 
-Implement addressing modes 2 (ldrb) and 3 (ldrsb)
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
 
-----------------------------------------------------------
+//===---------------------------------------------------------------------===//