[llvm-commits] [vector_llvm] CVS: llvm/lib/Target/IA64/IA64.h IA64AsmPrinter.cpp IA64ISelDAGToDAG.cpp IA64ISelLowering.cpp IA64ISelLowering.h IA64ISelPattern.cpp IA64InstrFormats.td IA64InstrInfo.td IA64RegisterInfo.cpp IA64RegisterInfo.td IA64TargetMachine.cpp IA64TargetMachine.h Makefile README

Wed Nov 16 10:32:47 PST 2005

Changes in directory llvm/lib/Target/IA64:

IA64.h updated: 1.2 -> 1.2.4.1
IA64AsmPrinter.cpp updated: 1.12 -> 1.12.4.1
IA64ISelDAGToDAG.cpp added (r1.10.2.2)
IA64ISelLowering.cpp added (r1.3.2.2)
IA64ISelLowering.h added (r1.1.4.2)
IA64ISelPattern.cpp updated: 1.66 -> 1.66.2.1
IA64InstrFormats.td updated: 1.1 -> 1.1.4.1
IA64InstrInfo.td updated: 1.15 -> 1.15.2.1
IA64RegisterInfo.cpp updated: 1.7 -> 1.7.2.1
IA64RegisterInfo.td updated: 1.8 -> 1.8.2.1
IA64TargetMachine.cpp updated: 1.5 -> 1.5.2.1
IA64TargetMachine.h updated: 1.4 -> 1.4.2.1
Makefile updated: 1.3 -> 1.3.4.1
README updated: 1.4 -> 1.4.4.1
---
Log message:

Merged mainline into Vector LLVM branch


---
Diffs of the changes:  (+1578 -184)

 IA64.h                |    5 
 IA64AsmPrinter.cpp    |   22 +
 IA64ISelDAGToDAG.cpp  |  556 +++++++++++++++++++++++++++++++++++++++++++++
 IA64ISelLowering.cpp  |  367 +++++++++++++++++++++++++++++
 IA64ISelLowering.h    |   88 +++++++
 IA64ISelPattern.cpp   |   16 -
 IA64InstrFormats.td   |   12 
 IA64InstrInfo.td      |  613 ++++++++++++++++++++++++++++++++++++++------------
 IA64RegisterInfo.cpp  |   27 --
 IA64RegisterInfo.td   |   30 +-
 IA64TargetMachine.cpp |   17 +
 IA64TargetMachine.h   |    2 
 Makefile              |    3 
 README                |    4 
 14 files changed, 1578 insertions(+), 184 deletions(-)


Index: llvm/lib/Target/IA64/IA64.h
diff -u llvm/lib/Target/IA64/IA64.h:1.2 llvm/lib/Target/IA64/IA64.h:1.2.4.1

--- llvm/lib/Target/IA64/IA64.h:1.2	Thu Apr 21 18:13:10 2005
+++ llvm/lib/Target/IA64/IA64.h	Wed Nov 16 12:32:35 2005
@@ -22,6 +22,11 @@
 class FunctionPass;
 class IntrinsicLowering;
 
+/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM
+/// function into IA64 machine code in a sane, DAG->DAG transform.
+///
+FunctionPass *createIA64DAGToDAGInstructionSelector(TargetMachine &TM);
+
 /// createIA64PatternInstructionSelector - This pass converts an LLVM function
 /// into a machine code representation in a more aggressive way.
 ///


Index: llvm/lib/Target/IA64/IA64AsmPrinter.cpp
diff -u llvm/lib/Target/IA64/IA64AsmPrinter.cpp:1.12 llvm/lib/Target/IA64/IA64AsmPrinter.cpp:1.12.4.1
--- llvm/lib/Target/IA64/IA64AsmPrinter.cpp:1.12	Fri Apr 22 12:54:15 2005
+++ llvm/lib/Target/IA64/IA64AsmPrinter.cpp	Wed Nov 16 12:32:35 2005
@@ -11,7 +11,7 @@
 // of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas'
 // assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this
 // output, but if so that's a bug I'd like to hear about: please file a bug
-// report in bugzilla. FYI, the excellent 'ias' assembler is bundled with
+// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with
 // the Intel C/C++ compiler for Itanium Linux.
 //
 //===----------------------------------------------------------------------===//
@@ -249,7 +249,25 @@
     }
     void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo,
                             MVT::ValueType VT) {
-      O << (int64_t)MI->getOperand(OpNo).getImmedValue();
+// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker
+// errors - instead of add rX = @gprel(CPI<whatever>), r1;; we now
+// emit movl rX = @gprel(CPI<whatever);;
+//      add  rX = rX, r1; 
+// this gives us 64 bits instead of 22 (for the add long imm) to play
+// with, which shuts up the linker. The problem is that the constant
+// pool entries aren't immediates at this stage, so we check here. 
+// If it's an immediate, print it the old fashioned way. If it's
+// not, we print it as a constant pool index. 
+      if(MI->getOperand(OpNo).isImmediate()) {
+        O << (int64_t)MI->getOperand(OpNo).getImmedValue();
+      } else { // this is a constant pool reference: FIXME: assert this
+        printOp(MI->getOperand(OpNo));
+      }
+    }
+
+    void printGlobalOperand(const MachineInstr *MI, unsigned OpNo,
+                          MVT::ValueType VT) {
+      printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction
     }
 
     void printCallOperand(const MachineInstr *MI, unsigned OpNo,


Index: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp
diff -c /dev/null llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.10.2.2
*** /dev/null	Wed Nov 16 12:32:47 2005
--- llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp	Wed Nov 16 12:32:36 2005
***************
*** 0 ****
--- 1,556 ----
+ //===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Duraid Madina and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file defines a pattern matching instruction selector for IA64,
+ // converting a legalized dag to an IA64 dag.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "IA64.h"
+ #include "IA64TargetMachine.h"
+ #include "IA64ISelLowering.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/CodeGen/SSARegMap.h"
+ #include "llvm/CodeGen/SelectionDAG.h"
+ #include "llvm/CodeGen/SelectionDAGISel.h"
+ #include "llvm/Target/TargetOptions.h"
+ #include "llvm/ADT/Statistic.h"
+ #include "llvm/Constants.h"
+ #include "llvm/GlobalValue.h"
+ #include "llvm/Support/Debug.h"
+ #include "llvm/Support/MathExtras.h"
+ using namespace llvm;
+ 
+ namespace {
+   Statistic<> FusedFP ("ia64-codegen", "Number of fused fp operations");
+   Statistic<> FrameOff("ia64-codegen", "Number of frame idx offsets collapsed");
+     
+   //===--------------------------------------------------------------------===//
+   /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine
+   /// instructions for SelectionDAG operations.
+   ///
+   class IA64DAGToDAGISel : public SelectionDAGISel {
+     IA64TargetLowering IA64Lowering;
+     unsigned GlobalBaseReg;
+   public:
+     IA64DAGToDAGISel(TargetMachine &TM)
+       : SelectionDAGISel(IA64Lowering), IA64Lowering(TM) {}
+     
+     virtual bool runOnFunction(Function &Fn) {
+       // Make sure we re-emit a set of the global base reg if necessary
+       GlobalBaseReg = 0;
+       return SelectionDAGISel::runOnFunction(Fn);
+     }
+  
+     /// getI64Imm - Return a target constant with the specified value, of type
+     /// i64.
+     inline SDOperand getI64Imm(uint64_t Imm) {
+       return CurDAG->getTargetConstant(Imm, MVT::i64);
+     }
+ 
+     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+     /// base register.  Return the virtual register that holds this value.
+     // SDOperand getGlobalBaseReg(); TODO: hmm
+     
+     // Select - Convert the specified operand from a target-independent to a
+     // target-specific node if it hasn't already been changed.
+     SDOperand Select(SDOperand Op);
+     
+     SDNode *SelectIntImmediateExpr(SDOperand LHS, SDOperand RHS,
+                                    unsigned OCHi, unsigned OCLo,
+                                    bool IsArithmetic = false,
+                                    bool Negate = false);
+     SDNode *SelectBitfieldInsert(SDNode *N);
+ 
+     /// SelectCC - Select a comparison of the specified values with the
+     /// specified condition code, returning the CR# of the expression.
+     SDOperand SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC);
+ 
+     /// SelectAddr - Given the specified address, return the two operands for a
+     /// load/store instruction, and return true if it should be an indexed [r+r]
+     /// operation.
+     bool SelectAddr(SDOperand Addr, SDOperand &Op1, SDOperand &Op2);
+ 
+     SDOperand BuildSDIVSequence(SDNode *N);
+     SDOperand BuildUDIVSequence(SDNode *N);
+     
+     /// InstructionSelectBasicBlock - This callback is invoked by
+     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+     virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+     
+     virtual const char *getPassName() const {
+       return "IA64 (Itanium) DAG->DAG Instruction Selector";
+     } 
+ 
+ // Include the pieces autogenerated from the target description.
+ #include "IA64GenDAGISel.inc"
+     
+ private:
+     SDOperand SelectCALL(SDOperand Op);
+   };
+ }
+ 
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ void IA64DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+   DEBUG(BB->dump());
+   
+   // The selection process is inherently a bottom-up recursive process (users
+   // select their uses before themselves).  Given infinite stack space, we
+   // could just start selecting on the root and traverse the whole graph.  In
+   // practice however, this causes us to run out of stack space on large basic
+   // blocks.  To avoid this problem, select the entry node, then all its uses,
+   // iteratively instead of recursively.
+   std::vector<SDOperand> Worklist;
+   Worklist.push_back(DAG.getEntryNode());
+   
+   // Note that we can do this in the IA64 target (scanning forward across token
+   // chain edges) because no nodes ever get folded across these edges.  On a
+   // target like X86 which supports load/modify/store operations, this would
+   // have to be more careful.
+   while (!Worklist.empty()) {
+     SDOperand Node = Worklist.back();
+     Worklist.pop_back();
+     
+     // Chose from the least deep of the top two nodes.
+     if (!Worklist.empty() &&
+         Worklist.back().Val->getNodeDepth() < Node.Val->getNodeDepth())
+       std::swap(Worklist.back(), Node);
+     
+     if ((Node.Val->getOpcode() >= ISD::BUILTIN_OP_END &&
+          Node.Val->getOpcode() < IA64ISD::FIRST_NUMBER) ||
+         CodeGenMap.count(Node)) continue;
+     
+     for (SDNode::use_iterator UI = Node.Val->use_begin(),
+          E = Node.Val->use_end(); UI != E; ++UI) {
+       // Scan the values.  If this use has a value that is a token chain, add it
+       // to the worklist.
+       SDNode *User = *UI;
+       for (unsigned i = 0, e = User->getNumValues(); i != e; ++i)
+         if (User->getValueType(i) == MVT::Other) {
+           Worklist.push_back(SDOperand(User, i));
+           break; 
+         }
+     }
+ 
+     // Finally, legalize this node.
+     Select(Node);
+   }
+     
+   // Select target instructions for the DAG.
+   DAG.setRoot(Select(DAG.getRoot()));
+   CodeGenMap.clear();
+   DAG.RemoveDeadNodes();
+   
+   // Emit machine code to BB. 
+   ScheduleAndEmitDAG(DAG);
+ }
+ 
+ 
+ SDOperand IA64DAGToDAGISel::SelectCALL(SDOperand Op) {
+   SDNode *N = Op.Val;
+   SDOperand Chain = Select(N->getOperand(0));
+   
+   unsigned CallOpcode;
+   std::vector<SDOperand> CallOperands;
+ 
+   // save the current GP, SP and RP : FIXME: do we need to do all 3 always?
+   SDOperand GPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::r1, MVT::i64);
+   Chain = GPBeforeCall.getValue(1);
+   SDOperand SPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::r12, MVT::i64);
+   Chain = SPBeforeCall.getValue(1);
+   SDOperand RPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::rp, MVT::i64);
+   Chain = RPBeforeCall.getValue(1);
+ 
+   // if we can call directly, do so
+   if (GlobalAddressSDNode *GASD =
+       dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
+     CallOpcode = IA64::BRCALL_IPREL;
+     CallOperands.push_back(CurDAG->getTargetGlobalAddress(GASD->getGlobal(),
+                                                           MVT::i64));
+   } else if (ExternalSymbolSDNode *ESSDN = // FIXME: we currently NEED this
+ 		                         // case for correctness, to avoid
+ 					 // "non-pic code with imm reloc.n
+ 					 // against dynamic symbol" errors
+              dyn_cast<ExternalSymbolSDNode>(N->getOperand(1))) {
+     CallOpcode = IA64::BRCALL_IPREL;
+     CallOperands.push_back(N->getOperand(1));
+   } else {
+     // otherwise we need to load the function descriptor,
+     // load the branch target (function)'s entry point and GP,
+     // branch (call) then restore the GP
+     
+     SDOperand FnDescriptor = Select(N->getOperand(1));
+    
+     // load the branch target's entry point [mem] and 
+     // GP value [mem+8]
+     SDOperand targetEntryPoint=CurDAG->getTargetNode(IA64::LD8, MVT::i64,
+ 		    FnDescriptor);
+     Chain = targetEntryPoint.getValue(1);
+     SDOperand targetGPAddr=CurDAG->getTargetNode(IA64::ADDS, MVT::i64, 
+ 		    FnDescriptor, CurDAG->getConstant(8, MVT::i64));
+     Chain = targetGPAddr.getValue(1);
+     SDOperand targetGP=CurDAG->getTargetNode(IA64::LD8, MVT::i64,
+ 		    targetGPAddr);
+     Chain = targetGP.getValue(1);
+ 
+ /* FIXME? (methcall still fails)
+     SDOperand targetEntryPoint=CurDAG->getLoad(MVT::i64, Chain, FnDescriptor,
+ 	                                CurDAG->getSrcValue(0));
+     SDOperand targetGPAddr=CurDAG->getNode(ISD::ADD, MVT::i64, FnDescriptor, 
+ 	                    CurDAG->getConstant(8, MVT::i64));
+     SDOperand targetGP=CurDAG->getLoad(MVT::i64, Chain, targetGPAddr,
+ 	                               CurDAG->getSrcValue(0));
+     */
+ 
+     /* this is just the long way of writing the two lines below?
+     // Copy the callee GP into r1
+     SDOperand r1 = CurDAG->getRegister(IA64::r1, MVT::i64);
+     Chain = CurDAG->getNode(ISD::CopyToReg, MVT::i64, Chain, r1,
+ 	             targetGP);
+     
+ 
+     // Copy the callee address into the b6 branch register
+     SDOperand B6 = CurDAG->getRegister(IA64::B6, MVT::i64);
+     Chain = CurDAG->getNode(ISD::CopyToReg, MVT::i64, Chain, B6,
+ 	             targetEntryPoint);
+     */
+ 
+     Chain = CurDAG->getCopyToReg(Chain, IA64::r1, targetGP);
+     Chain = CurDAG->getCopyToReg(Chain, IA64::B6, targetEntryPoint);
+     
+     CallOperands.push_back(CurDAG->getRegister(IA64::B6, MVT::i64));
+     CallOpcode = IA64::BRCALL_INDIRECT;
+   }
+  
+   // see section 8.5.8 of "Itanium Software Conventions and
+   // Runtime Architecture Guide to see some examples of what's going
+   // on here. (in short: int args get mapped 1:1 'slot-wise' to out0->out7,
+   // while FP args get mapped to F8->F15 as needed)
+   
+   // TODO: support in-memory arguments
+  
+   unsigned used_FPArgs=0; // how many FP args have been used so far?
+ 
+   unsigned intArgs[] = {IA64::out0, IA64::out1, IA64::out2, IA64::out3,
+                         IA64::out4, IA64::out5, IA64::out6, IA64::out7 };
+   unsigned FPArgs[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+                        IA64::F12, IA64::F13, IA64::F14, IA64::F15 };
+ 
+   SDOperand InFlag;  // Null incoming flag value.
+   
+   for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+     unsigned DestReg = 0;
+     MVT::ValueType RegTy = N->getOperand(i).getValueType();
+     if (RegTy == MVT::i64) {
+       assert((i-2) < 8 && "Too many int args");
+       DestReg = intArgs[i-2];
+     } else {
+       assert(MVT::isFloatingPoint(N->getOperand(i).getValueType()) &&
+              "Unpromoted integer arg?");
+       assert(used_FPArgs < 8 && "Too many fp args");
+       DestReg = FPArgs[used_FPArgs++];
+     }
+     
+     if (N->getOperand(i).getOpcode() != ISD::UNDEF) {
+       SDOperand Val = Select(N->getOperand(i));
+       if(MVT::isInteger(N->getOperand(i).getValueType())) {
+         Chain = CurDAG->getCopyToReg(Chain, DestReg, Val, InFlag);
+         InFlag = Chain.getValue(1);
+         CallOperands.push_back(CurDAG->getRegister(DestReg, RegTy));
+       }
+       // some functions (e.g. printf) want floating point arguments
+       // *also* passed as in-memory representations in integer registers
+       // this is FORTRAN legacy junk which we don't _always_ need
+       // to do, but to be on the safe side, we do. 
+       else if(MVT::isFloatingPoint(N->getOperand(i).getValueType())) {
+         assert((i-2) < 8 && "FP args alone would fit, but no int regs left");
+ 	// first copy into the appropriate FP reg
+         Chain = CurDAG->getCopyToReg(Chain, DestReg, Val);	
+ 	// then copy into the appropriate integer reg
+ 	DestReg = intArgs[i-2];
+         // GETFD takes an FP reg and writes a GP reg	
+ 	Chain = CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Val);
+         // FIXME: this next line is a bit unfortunate 
+ 	Chain = CurDAG->getCopyToReg(Chain, DestReg, Chain, InFlag); 
+         InFlag = Chain.getValue(1);
+         CallOperands.push_back(CurDAG->getRegister(DestReg, MVT::i64));
+       }
+     }
+   }
+   
+   // Finally, once everything is in registers to pass to the call, emit the
+   // call itself.
+   if (InFlag.Val)
+     CallOperands.push_back(InFlag);   // Strong dep on register copies.
+   else
+     CallOperands.push_back(Chain);    // Weak dep on whatever occurs before
+   Chain = CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag,
+                                 CallOperands);
+  
+   std::vector<SDOperand> CallResults;
+   
+   // If the call has results, copy the values out of the ret val registers.
+   switch (N->getValueType(0)) {
+     default: assert(0 && "Unexpected ret value!");
+     case MVT::Other: break;
+     case MVT::i1: {
+         // bools are returned as bytes 0/1 in r8
+ 	SDOperand byteval = CurDAG->getCopyFromReg(Chain, IA64::r8, MVT::i64,
+ 	                               Chain.getValue(1));
+         Chain = byteval.getValue(1);
+ 	Chain = CurDAG->getTargetNode(IA64::CMPNE, MVT::i1, MVT::Other,
+ 	    byteval, CurDAG->getRegister(IA64::r0, MVT::i64)).getValue(1);
+ 	CallResults.push_back(Chain.getValue(0));
+ 	break;
+ 	}
+     case MVT::i64:
+         Chain = CurDAG->getCopyFromReg(Chain, IA64::r8, MVT::i64,
+                                        Chain.getValue(1)).getValue(1);
+         CallResults.push_back(Chain.getValue(0));
+       break;
+     case MVT::f64:
+       Chain = CurDAG->getCopyFromReg(Chain, IA64::F8, N->getValueType(0),
+                                      Chain.getValue(1)).getValue(1);
+       CallResults.push_back(Chain.getValue(0));
+       break;
+   }
+    
+   // restore GP, SP and RP - FIXME: this doesn't quite work (e.g.
+   // methcall / objinst both segfault on exit) and it *really*
+   // doesn't work unless you have -sched=none
+   Chain = CurDAG->getCopyToReg(Chain, IA64::r1, GPBeforeCall);
+   Chain = CurDAG->getCopyToReg(Chain, IA64::r12, SPBeforeCall);
+   Chain = CurDAG->getCopyToReg(Chain, IA64::rp, RPBeforeCall);
+   CallResults.push_back(Chain); // llc segfaults w/o this,
+                       // ary3(e.g.) SIGILLs with 3
+ 
+   for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+     CodeGenMap[Op.getValue(i)] = CallResults[i];
+  
+   return CallResults[Op.ResNo];
+ }
+ 
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDOperand IA64DAGToDAGISel::Select(SDOperand Op) {
+   SDNode *N = Op.Val;
+   if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
+       N->getOpcode() < IA64ISD::FIRST_NUMBER)
+     return Op;   // Already selected.
+ 
+   // If this has already been converted, use it.
+   std::map<SDOperand, SDOperand>::iterator CGMI = CodeGenMap.find(Op);
+   if (CGMI != CodeGenMap.end()) return CGMI->second;
+   
+   switch (N->getOpcode()) {
+   default: break;
+ 
+   case ISD::CALL:
+   case ISD::TAILCALL: return SelectCALL(Op);
+  
+ /* todo:
+  * case ISD::DYNAMIC_STACKALLOC:
+ */
+   case ISD::ConstantFP: {
+     SDOperand Chain = CurDAG->getEntryNode(); // this is a constant, so..
+ 
+     if (cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0))
+       return CurDAG->getCopyFromReg(Chain, IA64::F0, MVT::f64);
+     else if (cast<ConstantFPSDNode>(N)->isExactlyValue(+1.0))
+       return CurDAG->getCopyFromReg(Chain, IA64::F1, MVT::f64);
+     else
+       assert(0 && "Unexpected FP constant!");
+   }
+ 
+   case ISD::FrameIndex: { // TODO: reduce creepyness
+     int FI = cast<FrameIndexSDNode>(N)->getIndex();
+     if (N->hasOneUse()) {
+       CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
+                            CurDAG->getTargetFrameIndex(FI, MVT::i64));
+       return SDOperand(N, 0);
+     }
+     return CurDAG->getTargetNode(IA64::MOV, MVT::i64,
+                                 CurDAG->getTargetFrameIndex(FI, MVT::i64));
+   }
+ 
+   case ISD::ConstantPool: {
+     Constant *C = cast<ConstantPoolSDNode>(N)->get();
+     SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+     return CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ?
+ 	                      CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
+   }
+ 
+   case ISD::GlobalAddress: {
+     GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
+     SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
+     SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, 
+ 	                          CurDAG->getRegister(IA64::r1, MVT::i64), GA);
+     return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp);
+   }
+ 			   
+   case ISD::LOAD:
+   case ISD::EXTLOAD:
+   case ISD::ZEXTLOAD: {
+     SDOperand Chain = Select(N->getOperand(0));
+     SDOperand Address = Select(N->getOperand(1));
+ 
+     MVT::ValueType TypeBeingLoaded = (N->getOpcode() == ISD::LOAD) ?
+       N->getValueType(0) : cast<VTSDNode>(N->getOperand(3))->getVT();
+     unsigned Opc;
+     switch (TypeBeingLoaded) {
+     default: N->dump(); assert(0 && "Cannot load this type!");
+     case MVT::i1: { // this is a bool
+       Opc = IA64::LD1; // first we load a byte, then compare for != 0
+       CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, 
+ 	CurDAG->getTargetNode(Opc, MVT::i64, Address),
+ 	CurDAG->getRegister(IA64::r0, MVT::i64), Chain);
+       return SDOperand(N, Op.ResNo); // XXX: early exit
+       }
+     case MVT::i8:  Opc = IA64::LD1; break;
+     case MVT::i16: Opc = IA64::LD2; break;
+     case MVT::i32: Opc = IA64::LD4; break;
+     case MVT::i64: Opc = IA64::LD8; break;
+     
+     case MVT::f32: Opc = IA64::LDF4; break;
+     case MVT::f64: Opc = IA64::LDF8; break;
+     }
+ 
+     CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
+                              Address, Chain); // TODO: comment this
+     
+     return SDOperand(N, Op.ResNo);
+   }
+   
+   case ISD::TRUNCSTORE:
+   case ISD::STORE: {
+     SDOperand Address = Select(N->getOperand(2));
+     SDOperand Chain = Select(N->getOperand(0));
+    
+     unsigned Opc;
+     if (N->getOpcode() == ISD::STORE) {
+       switch (N->getOperand(1).getValueType()) {
+       default: assert(0 && "unknown type in store");
+       case MVT::i1: { // this is a bool
+         Opc = IA64::ST1; // we store either 0 or 1 as a byte 
+         CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address,
+ 	    CurDAG->getTargetNode(IA64::PADDS, MVT::i64,
+ 	      CurDAG->getRegister(IA64::r0, MVT::i64),
+ 	      CurDAG->getConstant(1, MVT::i64),
+ 	      Select(N->getOperand(1))),
+ 	    Chain);
+         return SDOperand(N, 0); // XXX: early exit
+         }
+       case MVT::i64: Opc = IA64::ST8;  break;
+       case MVT::f64: Opc = IA64::STF8; break;
+       }
+     } else { //ISD::TRUNCSTORE
+       switch(cast<VTSDNode>(N->getOperand(4))->getVT()) {
+       default: assert(0 && "unknown type in truncstore");
+       case MVT::i8:  Opc = IA64::ST1;  break;
+       case MVT::i16: Opc = IA64::ST2;  break;
+       case MVT::i32: Opc = IA64::ST4;  break;
+       case MVT::f32: Opc = IA64::STF4; break;
+       }
+     }
+     
+     CurDAG->SelectNodeTo(N, Opc, MVT::Other, Select(N->getOperand(2)),
+                          Select(N->getOperand(1)), Chain);
+     return SDOperand(N, 0);
+   }
+ 
+   case ISD::BRCOND: {
+     SDOperand Chain = Select(N->getOperand(0));
+     SDOperand CC = Select(N->getOperand(1));
+     MachineBasicBlock *Dest =
+       cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
+     //FIXME - we do NOT need long branches all the time
+     CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, CurDAG->getBasicBlock(Dest), Chain);
+     return SDOperand(N, 0);
+   }
+ 
+   case ISD::CALLSEQ_START:
+   case ISD::CALLSEQ_END: {
+     int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+     unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
+                        IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
+     CurDAG->SelectNodeTo(N, Opc, MVT::Other,
+                          getI64Imm(Amt), Select(N->getOperand(0)));
+     return SDOperand(N, 0);
+   }
+ 
+   case ISD::RET: {
+     SDOperand Chain = Select(N->getOperand(0));     // Token chain.
+ 
+     switch (N->getNumOperands()) {
+     default:
+       assert(0 && "Unknown return instruction!");
+     case 2: {
+       SDOperand RetVal = Select(N->getOperand(1));
+       switch (RetVal.getValueType()) {
+       default: assert(0 && "I don't know how to return this type! (promote?)");
+                // FIXME: do I need to add support for bools here?
+                // (return '0' or '1' in r8, basically...)
+                //
+                // FIXME: need to round floats - 80 bits is bad, the tester
+                // told me so
+       case MVT::i64:
+         // we mark r8 as live on exit up above in LowerArguments()
+         // BuildMI(BB, IA64::MOV, 1, IA64::r8).addReg(Tmp1);
+         Chain = CurDAG->getCopyToReg(Chain, IA64::r8, RetVal);
+ 	break;
+       case MVT::f64:
+         // we mark F8 as live on exit up above in LowerArguments()
+         // BuildMI(BB, IA64::FMOV, 1, IA64::F8).addReg(Tmp1);
+         Chain = CurDAG->getCopyToReg(Chain, IA64::F8, RetVal);
+         break;
+       }
+       break;
+       }
+     case 1:
+       break;
+     }
+ 
+     // we need to copy VirtGPR (the vreg (to become a real reg)) that holds
+     // the output of this function's alloc instruction back into ar.pfs
+     // before we return. this copy must not float up above the last 
+     // outgoing call in this function!!!
+     SDOperand AR_PFSVal = CurDAG->getCopyFromReg(Chain, IA64Lowering.VirtGPR,
+ 		                                  MVT::i64);
+     Chain = AR_PFSVal.getValue(1);
+     Chain = CurDAG->getCopyToReg(Chain, IA64::AR_PFS, AR_PFSVal);
+ 
+     CurDAG->SelectNodeTo(N, IA64::RET, MVT::Other, Chain); // and then just emit a 'ret' instruction
+     
+     // before returning, restore the ar.pfs register (set by the 'alloc' up top)
+     // BuildMI(BB, IA64::MOV, 1).addReg(IA64::AR_PFS).addReg(IA64Lowering.VirtGPR);
+     //
+     return SDOperand(N, 0);
+   }
+   
+   case ISD::BR:
+ 		 // FIXME: we don't need long branches all the time!
+     CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, N->getOperand(1),
+                          Select(N->getOperand(0)));
+     return SDOperand(N, 0);
+   
+   }
+   
+   return SelectCode(Op);
+ }
+ 
+ 
+ /// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG
+ /// into an IA64-specific DAG, ready for instruction scheduling.
+ ///
+ FunctionPass *llvm::createIA64DAGToDAGInstructionSelector(TargetMachine &TM) {
+   return new IA64DAGToDAGISel(TM);
+ }
+ 


Index: llvm/lib/Target/IA64/IA64ISelLowering.cpp
diff -c /dev/null llvm/lib/Target/IA64/IA64ISelLowering.cpp:1.3.2.2
*** /dev/null	Wed Nov 16 12:32:47 2005
--- llvm/lib/Target/IA64/IA64ISelLowering.cpp	Wed Nov 16 12:32:36 2005
***************
*** 0 ****
--- 1,367 ----
+ //===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Duraid Madina and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the IA64ISelLowering class.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "IA64ISelLowering.h"
+ #include "IA64MachineFunctionInfo.h"
+ #include "IA64TargetMachine.h"
+ #include "llvm/CodeGen/MachineFrameInfo.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/CodeGen/SelectionDAG.h"
+ #include "llvm/CodeGen/SSARegMap.h"
+ #include "llvm/Constants.h"
+ #include "llvm/Function.h"
+ using namespace llvm;
+ 
+ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
+   : TargetLowering(TM) {
+  
+       // register class for general registers
+       addRegisterClass(MVT::i64, IA64::GRRegisterClass);
+ 
+       // register class for FP registers
+       addRegisterClass(MVT::f64, IA64::FPRegisterClass);
+ 
+       // register class for predicate registers
+       addRegisterClass(MVT::i1, IA64::PRRegisterClass);
+ 
+       setOperationAction(ISD::BRCONDTWOWAY     , MVT::Other, Expand);
+       setOperationAction(ISD::BRTWOWAY_CC      , MVT::Other, Expand);
+       setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
+ 
+       setSetCCResultType(MVT::i1);
+       setShiftAmountType(MVT::i64);
+ 
+       setOperationAction(ISD::EXTLOAD          , MVT::i1   , Promote);
+ 
+       setOperationAction(ISD::ZEXTLOAD         , MVT::i1   , Expand);
+ 
+       setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
+       setOperationAction(ISD::SEXTLOAD         , MVT::i8   , Expand);
+       setOperationAction(ISD::SEXTLOAD         , MVT::i16  , Expand);
+       setOperationAction(ISD::SEXTLOAD         , MVT::i32  , Expand);
+ 
+       setOperationAction(ISD::FREM             , MVT::f32  , Expand);
+       setOperationAction(ISD::FREM             , MVT::f64  , Expand);
+ 
+       setOperationAction(ISD::UREM             , MVT::f32  , Expand);
+       setOperationAction(ISD::UREM             , MVT::f64  , Expand);
+ 
+       setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
+       setOperationAction(ISD::MEMSET           , MVT::Other, Expand);
+       setOperationAction(ISD::MEMCPY           , MVT::Other, Expand);
+       
+       setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
+       setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
+ 
+       // We don't support sin/cos/sqrt
+       setOperationAction(ISD::FSIN , MVT::f64, Expand);
+       setOperationAction(ISD::FCOS , MVT::f64, Expand);
+       setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+       setOperationAction(ISD::FSIN , MVT::f32, Expand);
+       setOperationAction(ISD::FCOS , MVT::f32, Expand);
+       setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ 
+       //IA64 has these, but they are not implemented
+       setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+       setOperationAction(ISD::CTLZ , MVT::i64  , Expand);
+ 
+       computeRegisterProperties();
+ 
+       addLegalFPImmediate(+0.0);
+       addLegalFPImmediate(+1.0);
+ }
+ 
+ /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+ static bool isFloatingPointZero(SDOperand Op) {
+   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+     return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
+   else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
+     // Maybe this has already been legalized into the constant pool?
+     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+       if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
+         return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
+   }
+   return false;
+ }
+ 
+ std::vector<SDOperand>
+ IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+   std::vector<SDOperand> ArgValues;
+   //
+   // add beautiful description of IA64 stack frame format
+   // here (from intel 24535803.pdf most likely)
+   //
+   MachineFunction &MF = DAG.getMachineFunction();
+   MachineFrameInfo *MFI = MF.getFrameInfo();
+   
+   GP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+   SP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+   RP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+   
+   MachineBasicBlock& BB = MF.front();
+ 
+   unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+                          IA64::r36, IA64::r37, IA64::r38, IA64::r39};
+ 
+   unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+                         IA64::F12,IA64::F13,IA64::F14, IA64::F15};
+ 
+   unsigned argVreg[8];
+   unsigned argPreg[8];
+   unsigned argOpc[8];
+ 
+   unsigned used_FPArgs = 0; // how many FP args have been used so far?
+ 
+   unsigned ArgOffset = 0;
+   int count = 0;
+ 
+   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+     {
+       SDOperand newroot, argt;
+       if(count < 8) { // need to fix this logic? maybe.
+ 
+         switch (getValueType(I->getType())) {
+           default:
+             assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n"); 
+           case MVT::f32:
+             // fixme? (well, will need to for weird FP structy stuff,
+             // see intel ABI docs)
+           case MVT::f64:
+ //XXX            BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]);
+             MF.addLiveIn(args_FP[used_FPArgs]); // mark this reg as liveIn
+             // floating point args go into f8..f15 as-needed, the increment
+             argVreg[count] =                              // is below..:
+             MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::f64));
+             // FP args go into f8..f15 as needed: (hence the ++)
+             argPreg[count] = args_FP[used_FPArgs++];
+             argOpc[count] = IA64::FMOV;
+             argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count],
+                                                 MVT::f64);
+             if (I->getType() == Type::FloatTy)
+               argt = DAG.getNode(ISD::FP_ROUND, MVT::f32, argt);
+             break;
+           case MVT::i1: // NOTE: as far as C abi stuff goes,
+                         // bools are just boring old ints
+           case MVT::i8:
+           case MVT::i16:
+           case MVT::i32:
+           case MVT::i64:
+ //XXX            BuildMI(&BB, IA64::IDEF, 0, args_int[count]);
+             MF.addLiveIn(args_int[count]); // mark this register as liveIn
+             argVreg[count] =
+             MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+             argPreg[count] = args_int[count];
+             argOpc[count] = IA64::MOV;
+             argt = newroot =
+               DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::i64);
+             if ( getValueType(I->getType()) != MVT::i64)
+               argt = DAG.getNode(ISD::TRUNCATE, getValueType(I->getType()),
+                   newroot);
+             break;
+         }
+       } else { // more than 8 args go into the frame
+         // Create the frame index object for this incoming parameter...
+         ArgOffset = 16 + 8 * (count - 8);
+         int FI = MFI->CreateFixedObject(8, ArgOffset);
+ 
+         // Create the SelectionDAG nodes corresponding to a load
+         //from this parameter
+         SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64);
+         argt = newroot = DAG.getLoad(getValueType(I->getType()),
+                                      DAG.getEntryNode(), FIN, DAG.getSrcValue(NULL));
+       }
+       ++count;
+       DAG.setRoot(newroot.getValue(1));
+       ArgValues.push_back(argt);
+     }
+ 
+ 
+   // Create a vreg to hold the output of (what will become)
+   // the "alloc" instruction
+   VirtGPR = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+   BuildMI(&BB, IA64::PSEUDO_ALLOC, 0, VirtGPR);
+   // we create a PSEUDO_ALLOC (pseudo)instruction for now
+ /*
+   BuildMI(&BB, IA64::IDEF, 0, IA64::r1);
+ 
+   // hmm:
+   BuildMI(&BB, IA64::IDEF, 0, IA64::r12);
+   BuildMI(&BB, IA64::IDEF, 0, IA64::rp);
+   // ..hmm.
+   
+   BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1);
+ 
+   // hmm:
+   BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12);
+   BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp);
+   // ..hmm.
+ */
+ 
+   unsigned tempOffset=0;
+ 
+   // if this is a varargs function, we simply lower llvm.va_start by
+   // pointing to the first entry
+   if(F.isVarArg()) {
+     tempOffset=0;
+     VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset);
+   }
+ 
+   // here we actually do the moving of args, and store them to the stack
+   // too if this is a varargs function:
+   for (int i = 0; i < count && i < 8; ++i) {
+     BuildMI(&BB, argOpc[i], 1, argVreg[i]).addReg(argPreg[i]);
+     if(F.isVarArg()) {
+       // if this is a varargs function, we copy the input registers to the stack
+       int FI = MFI->CreateFixedObject(8, tempOffset);
+       tempOffset+=8;   //XXX: is it safe to use r22 like this?
+       BuildMI(&BB, IA64::MOV, 1, IA64::r22).addFrameIndex(FI);
+       // FIXME: we should use st8.spill here, one day
+       BuildMI(&BB, IA64::ST8, 1, IA64::r22).addReg(argPreg[i]);
+     }
+   }
+ 
+   // Finally, inform the code generator which regs we return values in.
+   // (see the ISD::RET: case in the instruction selector)
+   switch (getValueType(F.getReturnType())) {
+   default: assert(0 && "i have no idea where to return this type!");
+   case MVT::isVoid: break;
+   case MVT::i1:
+   case MVT::i8:
+   case MVT::i16:
+   case MVT::i32:
+   case MVT::i64:
+     MF.addLiveOut(IA64::r8);
+     break;
+   case MVT::f32:
+   case MVT::f64:
+     MF.addLiveOut(IA64::F8);
+     break;
+   }
+ 
+   return ArgValues;
+ }
+ 
+ std::pair<SDOperand, SDOperand>
+ IA64TargetLowering::LowerCallTo(SDOperand Chain,
+                                 const Type *RetTy, bool isVarArg,
+                                 unsigned CallingConv, bool isTailCall,
+                                 SDOperand Callee, ArgListTy &Args,
+                                 SelectionDAG &DAG) {
+ 
+   MachineFunction &MF = DAG.getMachineFunction();
+ 
+   unsigned NumBytes = 16;
+   unsigned outRegsUsed = 0;
+ 
+   if (Args.size() > 8) {
+     NumBytes += (Args.size() - 8) * 8;
+     outRegsUsed = 8;
+   } else {
+     outRegsUsed = Args.size();
+   }
+ 
+   // FIXME? this WILL fail if we ever try to pass around an arg that
+   // consumes more than a single output slot (a 'real' double, int128
+   // some sort of aggregate etc.), as we'll underestimate how many 'outX'
+   // registers we use. Hopefully, the assembler will notice.
+   MF.getInfo<IA64FunctionInfo>()->outRegsUsed=
+     std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed);
+ 
+   Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
+                         DAG.getConstant(NumBytes, getPointerTy()));
+ 
+   std::vector<SDOperand> args_to_use;
+   for (unsigned i = 0, e = Args.size(); i != e; ++i)
+     {
+       switch (getValueType(Args[i].second)) {
+       default: assert(0 && "unexpected argument type!");
+       case MVT::i1:
+       case MVT::i8:
+       case MVT::i16:
+       case MVT::i32:
+         //promote to 64-bits, sign/zero extending based on type
+         //of the argument
+         if(Args[i].second->isSigned())
+           Args[i].first = DAG.getNode(ISD::SIGN_EXTEND, MVT::i64,
+               Args[i].first);
+         else
+           Args[i].first = DAG.getNode(ISD::ZERO_EXTEND, MVT::i64,
+               Args[i].first);
+         break;
+       case MVT::f32:
+         //promote to 64-bits
+         Args[i].first = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Args[i].first);
+       case MVT::f64:
+       case MVT::i64:
+         break;
+       }
+       args_to_use.push_back(Args[i].first);
+     }
+ 
+   std::vector<MVT::ValueType> RetVals;
+   MVT::ValueType RetTyVT = getValueType(RetTy);
+   if (RetTyVT != MVT::isVoid)
+     RetVals.push_back(RetTyVT);
+   RetVals.push_back(MVT::Other);
+ 
+   SDOperand TheCall = SDOperand(DAG.getCall(RetVals, Chain,
+                                             Callee, args_to_use), 0);
+   Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
+   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+                       DAG.getConstant(NumBytes, getPointerTy()));
+   return std::make_pair(TheCall, Chain);
+ }
+ 
+ SDOperand
+ IA64TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
+                                  Value *VAListV, SelectionDAG &DAG) {
+   // vastart just stores the address of the VarArgsFrameIndex slot.
+   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64);
+   return DAG.getNode(ISD::STORE, MVT::Other, Chain, FR,
+                      VAListP, DAG.getSrcValue(VAListV));
+ }
+ 
+ std::pair<SDOperand,SDOperand> IA64TargetLowering::
+ LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV,
+            const Type *ArgTy, SelectionDAG &DAG) {
+ 
+   MVT::ValueType ArgVT = getValueType(ArgTy);
+   SDOperand Val = DAG.getLoad(MVT::i64, Chain,
+                               VAListP, DAG.getSrcValue(VAListV));
+   SDOperand Result = DAG.getLoad(ArgVT, DAG.getEntryNode(), Val,
+                                  DAG.getSrcValue(NULL));
+   unsigned Amt;
+   if (ArgVT == MVT::i32 || ArgVT == MVT::f32)
+     Amt = 8;
+   else {
+     assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) &&
+            "Other types should have been promoted for varargs!");
+     Amt = 8;
+   }
+   Val = DAG.getNode(ISD::ADD, Val.getValueType(), Val,
+                     DAG.getConstant(Amt, Val.getValueType()));
+   Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain,
+                       Val, VAListP, DAG.getSrcValue(VAListV));
+   return std::make_pair(Result, Chain);
+ }
+ 
+ 
+ 
+ std::pair<SDOperand, SDOperand> IA64TargetLowering::
+ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
+                         SelectionDAG &DAG) {
+   assert(0 && "LowerFrameReturnAddress unimplemented");
+   abort();
+ }
+ 


Index: llvm/lib/Target/IA64/IA64ISelLowering.h
diff -c /dev/null llvm/lib/Target/IA64/IA64ISelLowering.h:1.1.4.2
*** /dev/null	Wed Nov 16 12:32:47 2005
--- llvm/lib/Target/IA64/IA64ISelLowering.h	Wed Nov 16 12:32:36 2005
***************
*** 0 ****
--- 1,88 ----
+ //===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Duraid Madina and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file defines the interfaces that IA64 uses to lower LLVM code into a
+ // selection DAG.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H
+ #define LLVM_TARGET_IA64_IA64ISELLOWERING_H
+ 
+ #include "llvm/Target/TargetLowering.h"
+ #include "llvm/CodeGen/SelectionDAG.h"
+ #include "IA64.h"
+ 
+ namespace llvm {
+   namespace IA64ISD {
+     enum NodeType {
+       // Start the numbering where the builting ops and target ops leave off.
+       FIRST_NUMBER = ISD::BUILTIN_OP_END+IA64::INSTRUCTION_LIST_END,
+ 
+       /// FSEL - Traditional three-operand fsel node.
+       ///
+       FSEL,
+       
+       /// FCFID - The FCFID instruction, taking an f64 operand and producing
+       /// and f64 value containing the FP representation of the integer that
+       /// was temporarily in the f64 operand.
+       FCFID,
+       
+       /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 
+       /// operand, producing an f64 value containing the integer representation
+       /// of that FP value.
+       FCTIDZ, FCTIWZ,
+     };
+   }  
+   
+   class IA64TargetLowering : public TargetLowering {
+     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+     //int ReturnAddrIndex;              // FrameIndex for return slot.
+     unsigned GP, SP, RP; // FIXME - clean this mess up
+ 	  
+   public:
+     IA64TargetLowering(TargetMachine &TM);
+ 
+     unsigned VirtGPR; // this is public so it can be accessed in the selector
+                       // for ISD::RET. add an accessor instead? FIXME
+ 	    
+     /// LowerOperation - Provide custom lowering hooks for some operations.
+     ///
+ // XXX    virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+     
+     /// LowerArguments - This hook must be implemented to indicate how we should
+     /// lower the arguments for the specified function, into the specified DAG.
+     virtual std::vector<SDOperand>
+       LowerArguments(Function &F, SelectionDAG &DAG);
+     
+     /// LowerCallTo - This hook lowers an abstract call to a function into an
+     /// actual call.
+     virtual std::pair<SDOperand, SDOperand>
+       LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg,
+                   unsigned CC,
+                   bool isTailCall, SDOperand Callee, ArgListTy &Args,
+                   SelectionDAG &DAG);
+     
+     virtual SDOperand LowerVAStart(SDOperand Chain, SDOperand VAListP,
+                                    Value *VAListV, SelectionDAG &DAG);
+     
+     virtual std::pair<SDOperand,SDOperand>
+       LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV,
+                  const Type *ArgTy, SelectionDAG &DAG);
+     
+     virtual std::pair<SDOperand, SDOperand>
+       LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
+                               SelectionDAG &DAG);
+     
+ // XXX    virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ // XXX                                                      MachineBasicBlock *MBB);
+   };
+ }
+ 
+ #endif   // LLVM_TARGET_IA64_IA64ISELLOWERING_H


Index: llvm/lib/Target/IA64/IA64ISelPattern.cpp
diff -u llvm/lib/Target/IA64/IA64ISelPattern.cpp:1.66 llvm/lib/Target/IA64/IA64ISelPattern.cpp:1.66.2.1
--- llvm/lib/Target/IA64/IA64ISelPattern.cpp:1.66	Thu Oct  6 23:50:48 2005
+++ llvm/lib/Target/IA64/IA64ISelPattern.cpp	Wed Nov 16 12:32:36 2005
@@ -84,7 +84,7 @@
       
       setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
       setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
-
+      
       // We don't support sin/cos/sqrt
       setOperationAction(ISD::FSIN , MVT::f64, Expand);
       setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -96,6 +96,9 @@
       //IA64 has these, but they are not implemented
       setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
       setOperationAction(ISD::CTLZ , MVT::i64  , Expand);
+      // FIXME: implement mulhs (xma.h) and mulhu (xma.hu)
+      setOperationAction(ISD::MULHS , MVT::i64  , Expand);
+      setOperationAction(ISD::MULHU , MVT::i64  , Expand);
 
       computeRegisterProperties();
 
@@ -1463,7 +1466,7 @@
 */
       BuildMI(BB, IA64::PCMPEQUNC, 3, pTemp1)
         .addReg(IA64::r0).addReg(IA64::r0).addReg(pA);
-      BuildMI(BB, IA64::TPCMPEQ, 3, Result)
+      BuildMI(BB, IA64::TPCMPEQ, 4, Result)
         .addReg(pTemp1).addReg(IA64::r0).addReg(IA64::r0).addReg(pB);
       break;
     }
@@ -1954,8 +1957,13 @@
       Select(Chain);
       IA64Lowering.restoreGP(BB);
       unsigned dummy = MakeReg(MVT::i64);
-      BuildMI(BB, IA64::ADD, 2, dummy).addConstantPoolIndex(CPIdx)
-        .addReg(IA64::r1); // CPI+GP
+      unsigned dummy2 = MakeReg(MVT::i64);
+      BuildMI(BB, IA64::MOVLIMM64, 1, dummy2).addConstantPoolIndex(CPIdx);
+      BuildMI(BB, IA64::ADD, 2, dummy).addReg(dummy2).addReg(IA64::r1); //CPI+GP
+
+
+ // OLD     BuildMI(BB, IA64::ADD, 2, dummy).addConstantPoolIndex(CPIdx)
+ // (FIXME!)      .addReg(IA64::r1); // CPI+GP
       if(!isBool)
         BuildMI(BB, Opc, 1, Result).addReg(dummy);
       else { // emit a little pseudocode to load a bool (stored in one byte)


Index: llvm/lib/Target/IA64/IA64InstrFormats.td
diff -u llvm/lib/Target/IA64/IA64InstrFormats.td:1.1 llvm/lib/Target/IA64/IA64InstrFormats.td:1.1.4.1
--- llvm/lib/Target/IA64/IA64InstrFormats.td:1.1	Thu Mar 17 12:17:03 2005
+++ llvm/lib/Target/IA64/IA64InstrFormats.td	Wed Nov 16 12:32:36 2005
@@ -36,6 +36,14 @@
   let Inst{5-0} = qpReg;
 }
 
+class AForm_DAG<bits<4> opcode, bits<6> qpReg, dag OL, string asmstr,
+      list<dag> pattern> : 
+  InstIA64<opcode, OL, asmstr> {
+
+  let Pattern = pattern;
+  let Inst{5-0} = qpReg;
+}
+
 let isBranch = 1, isTerminator = 1 in
 class BForm<bits<4> opcode, bits<6> x6, bits<3> btype, dag OL, string asmstr> :
   InstIA64<opcode, OL, asmstr> {
@@ -64,4 +72,8 @@
 class PseudoInstIA64<dag OL, string nm> : InstIA64<0, OL, nm>  {
 }
 
+class PseudoInstIA64_DAG<dag OL, string nm, list<dag> pattern>
+  : InstIA64<0, OL, nm> {
+  let Pattern = pattern;
+}
 


Index: llvm/lib/Target/IA64/IA64InstrInfo.td
diff -u llvm/lib/Target/IA64/IA64InstrInfo.td:1.15 llvm/lib/Target/IA64/IA64InstrInfo.td:1.15.2.1
--- llvm/lib/Target/IA64/IA64InstrInfo.td:1.15	Wed Sep 14 16:11:13 2005
+++ llvm/lib/Target/IA64/IA64InstrInfo.td	Wed Nov 16 12:32:36 2005
@@ -15,11 +15,12 @@
 
 include "IA64InstrFormats.td"
 
+def u2imm : Operand<i8>;
 def u6imm : Operand<i8>;
 def s8imm : Operand<i8> {
   let PrintMethod = "printS8ImmOperand";
 }
-def s14imm  : Operand<i16> {
+def s14imm  : Operand<i64> {
   let PrintMethod = "printS14ImmOperand";
 }
 def s22imm  : Operand<i32> {
@@ -32,24 +33,363 @@
   let PrintMethod = "printS64ImmOperand";
 }
 
+let PrintMethod = "printGlobalOperand" in
+  def globaladdress : Operand<i64>;
+
 // the asmprinter needs to know about calls
 let PrintMethod = "printCallOperand" in
   def calltarget : Operand<i64>;
   
-def PHI : PseudoInstIA64<(ops variable_ops), "PHI">;
-def IDEF : PseudoInstIA64<(ops variable_ops), "// IDEF">;
-def IUSE : PseudoInstIA64<(ops variable_ops), "// IUSE">;
-def ADJUSTCALLSTACKUP : PseudoInstIA64<(ops variable_ops),
-                                        "// ADJUSTCALLSTACKUP">;
-def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(ops variable_ops),
-                                         "// ADJUSTCALLSTACKDOWN">;
-def PSEUDO_ALLOC : PseudoInstIA64<(ops GR:$foo), "// PSEUDO_ALLOC">;
+/* new daggy action!!! */
 
-def ALLOC : AForm<0x03, 0x0b,
-  (ops GR:$dst, i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating),
-    "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating;;">;
+def is32ones : PatLeaf<(i64 imm), [{
+  // is32ones predicate - True if the immediate is 0x00000000FFFFFFFF 
+  // Used to create ZXT4s appropriately 
+  uint64_t v = (uint64_t)N->getValue();
+  return (v == 0x00000000FFFFFFFFLL);
+}]>;
+
+// isMIXable predicates - True if the immediate is
+// 0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF
+// etc, through 0x00000000FFFFFFFF
+// Used to test for the suitability of mix* 
+def isMIX1Lable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0xFF00FF00FF00FF00LL);
+}]>;
+def isMIX1Rable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0x00FF00FF00FF00FFLL);
+}]>;
+def isMIX2Lable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0xFFFF0000FFFF0000LL);
+}]>;
+def isMIX2Rable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0x0000FFFF0000FFFFLL);
+}]>;
+def isMIX4Lable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0xFFFFFFFF00000000LL);
+}]>;
+def isMIX4Rable: PatLeaf<(i64 imm), [{
+  return((uint64_t)N->getValue()==0x00000000FFFFFFFFLL);
+}]>;
+
+def isSHLADDimm: PatLeaf<(i64 imm), [{
+  // isSHLADDimm predicate - True if the immediate is exactly 1, 2, 3 or 4
+  // - 0 is *not* okay.
+  // Used to create shladd instructions appropriately
+  int64_t v = (int64_t)N->getValue();
+  return (v >= 1 && v <= 4);
+}]>;
+
+def immSExt14  : PatLeaf<(i64 imm), [{
+  // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended
+  // field.  Used by instructions like 'adds'.
+  int64_t v = (int64_t)N->getValue();
+  return (v <= 8191 && v >= -8192);
+}]>;
+
+def imm64  : PatLeaf<(i64 imm), [{
+  // imm64 predicate - True if the immediate fits in a 64-bit 
+  // field - i.e., true. used to keep movl happy
+  return true;
+}]>;
+
+def ADD  : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+           "add $dst = $src1, $src2;;",
+	   [(set GR:$dst, (add GR:$src1, GR:$src2))]>;
+
+def ADD1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+           "add $dst = $src1, $src2, 1;;",
+	   [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>;
+
+def ADDS : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
+           "adds $dst = $imm, $src1;;",
+	   [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>;
+ 
+def PADDS: AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm, PR:$qp),
+           "($qp) adds $dst = $imm, $src1;;",
+	   []>;
+
+def MOVL : AForm_DAG<0x03, 0x0b, (ops GR:$dst, s64imm:$imm),
+           "movl $dst = $imm;;",
+	   [(set GR:$dst, imm64:$imm)]>;
+
+def ADDL_GA : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, globaladdress:$imm),
+           "addl $dst = $imm, $src1;;",
+	   []>;
+  
+def SUB  : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+           "sub $dst = $src1, $src2;;",
+	   [(set GR:$dst, (sub GR:$src1, GR:$src2))]>;
+
+def SUB1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+           "sub $dst = $src1, $src2, 1;;",
+	   [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>;
+
+let isTwoAddress = 1 in {
+def TPCADDIMM22 : AForm<0x03, 0x0b,
+  (ops GR:$dst, GR:$src1, s22imm:$imm, PR:$qp),
+    "($qp) add $dst = $imm, $dst;;">;
+def TPCMPIMM8NE : AForm<0x03, 0x0b,
+  (ops PR:$dst, PR:$src1, s22imm:$imm, GR:$src2, PR:$qp),
+    "($qp) cmp.ne $dst , p0 = $imm, $src2;;">;
+}
+
+// zero extend a bool (predicate reg) into an integer reg
+def ZXTb : Pat<(zext PR:$src),
+          (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+
+// normal sign/zero-extends
+def SXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;",
+           [(set GR:$dst, (sext_inreg GR:$src, i8))]>;
+def ZXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src;;",
+           [(set GR:$dst, (and GR:$src, 255))]>;
+def SXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src;;",
+           [(set GR:$dst, (sext_inreg GR:$src, i16))]>;
+def ZXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src;;",
+           [(set GR:$dst, (and GR:$src, 65535))]>;
+def SXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src;;",
+           [(set GR:$dst, (sext_inreg GR:$src, i32))]>;
+def ZXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src;;",
+           [(set GR:$dst, (and GR:$src, is32ones))]>;
+
+// fixme: shrs vs shru?
+def MIX1L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix1.l $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and GR:$src1, isMIX1Lable),
+	                     (and (srl GR:$src2, 8), isMIX1Lable)))]>;
+
+def MIX2L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix2.l $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and GR:$src1, isMIX2Lable),
+	                     (and (srl GR:$src2, 16), isMIX2Lable)))]>;
+
+def MIX4L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix4.l $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and GR:$src1, isMIX4Lable),
+	                     (and (srl GR:$src2, 32), isMIX4Lable)))]>;
+
+def MIX1R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix1.r $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and (shl GR:$src1, 8), isMIX1Rable),
+	                     (and GR:$src2, isMIX1Rable)))]>;
+
+def MIX2R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix2.r $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and (shl GR:$src1, 16), isMIX2Rable),
+	                     (and GR:$src2, isMIX2Rable)))]>;
+
+def MIX4R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "mix4.r $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or (and (shl GR:$src1, 32), isMIX4Rable),
+	                     (and GR:$src2, isMIX4Rable)))]>;
+
+def GETFSIGD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, FP:$src),
+  "getf.sig $dst = $src;;",
+  []>;
+
+def SETFSIGD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, GR:$src),
+  "setf.sig $dst = $src;;",
+  []>;
+
+def XMALD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "xma.l $dst = $src1, $src2, $src3;;",
+  []>;
+def XMAHD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "xma.h $dst = $src1, $src2, $src3;;",
+  []>;
+def XMAHUD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "xma.hu $dst = $src1, $src2, $src3;;",
+  []>;
+
+// pseudocode for integer multiplication 
+def : Pat<(mul GR:$src1, GR:$src2),
+           (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhs GR:$src1, GR:$src2),
+           (GETFSIGD (XMAHD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhu GR:$src1, GR:$src2),
+           (GETFSIGD (XMAHUD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+
+// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add)
+// has imm form, too
+
+// def ADDS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
+//   "adds $dst = $imm, $src1;;">;
+
+def AND   : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "and $dst = $src1, $src2;;",
+	  [(set GR:$dst, (and GR:$src1, GR:$src2))]>;
+def ANDCM : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "andcm $dst = $src1, $src2;;",
+	  [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>;
+// TODO: and/andcm/or/xor/add/sub/shift immediate forms
+def OR    : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "or $dst = $src1, $src2;;",
+	  [(set GR:$dst, (or GR:$src1, GR:$src2))]>;
+
+def pOR   : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2, PR:$qp),
+          "($qp) or $dst = $src1, $src2;;">;
+
+// the following are all a bit unfortunate: we throw away the complement
+// of the compare!
+def CMPEQ : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.eq $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>;
+def CMPGT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.gt $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>;
+def CMPGE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.ge $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setge GR:$src1, GR:$src2))]>;
+def CMPLT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.lt $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>;
+def CMPLE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.le $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setle GR:$src1, GR:$src2))]>;
+def CMPNE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.ne $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setne GR:$src1, GR:$src2))]>;
+def CMPLTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.ltu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setult GR:$src1, GR:$src2))]>;
+def CMPGTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.gtu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>;
+def CMPLEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.leu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setule GR:$src1, GR:$src2))]>;
+def CMPGEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+          "cmp.geu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>;
+
+// and we do the whole thing again for FP compares!
+def FCMPEQ : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.eq $dst, p0 = $src1, $src2;;",
+          [(set PR:$dst, (seteq FP:$src1, FP:$src2))]>;
+def FCMPGT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.gt $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setgt FP:$src1, FP:$src2))]>;
+def FCMPGE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.ge $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setge FP:$src1, FP:$src2))]>;
+def FCMPLT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.lt $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setlt FP:$src1, FP:$src2))]>;
+def FCMPLE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.le $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setle FP:$src1, FP:$src2))]>;
+def FCMPNE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.neq $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setne FP:$src1, FP:$src2))]>;
+def FCMPLTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.ltu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setult FP:$src1, FP:$src2))]>;
+def FCMPGTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.gtu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setugt FP:$src1, FP:$src2))]>;
+def FCMPLEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.leu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setule FP:$src1, FP:$src2))]>;
+def FCMPGEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+          "fcmp.geu $dst, p0 = $src1, $src2;;",
+	  [(set PR:$dst, (setuge FP:$src1, FP:$src2))]>;
+
+def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$qp),
+    "($qp) cmp.eq.unc $dst, p0 = r0, r0;;">;
+
+def : Pat<(trunc GR:$src),  // truncate i64 to i1
+          (CMPNE GR:$src, r0)>; // $src!=0? If so, PR:$dst=true
+	  
+let isTwoAddress=1 in {
+  def TPCMPEQR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$bogus, PR:$qp),
+    "($qp) cmp.eq $dst, p0 = r0, r0;;">;
+  def TPCMPNER0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$bogus, PR:$qp),
+    "($qp) cmp.ne $dst, p0 = r0, r0;;">;
+}
+
+/* our pseudocode for OR on predicates is:
+pC = pA OR pB
+-------------
+(pA) cmp.eq.unc pC,p0 = r0,r0  // pC = pA
+ ;;
+(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 */
+
+def bOR   : Pat<(or PR:$src1, PR:$src2),
+          (TPCMPEQR0R0 (PCMPEQUNCR0R0 PR:$src1), PR:$src2)>;
+
+/* our pseudocode for AND on predicates is:
+ *
+(pA) cmp.eq.unc pC,p0 = r0,r0   // pC = pA
+     cmp.eq pTemp,p0 = r0,r0    // pTemp = NOT pB
+     ;;
+(pB) cmp.ne pTemp,p0 = r0,r0
+     ;;
+(pTemp)cmp.ne pC,p0 = r0,r0    // if (NOT pB) pC = 0  */
+
+def bAND  : Pat<(and PR:$src1, PR:$src2),
+          ( TPCMPNER0R0 (PCMPEQUNCR0R0 PR:$src1),
+	    (TPCMPNER0R0 (CMPEQ r0, r0), PR:$src2) )>;
+
+/* one possible routine for XOR on predicates is:
+
+      // Compute px = py ^ pz
+        // using sum of products: px = (py & !pz) | (pz & !py)
+        // Uses 5 instructions in 3 cycles.
+        // cycle 1
+(pz)    cmp.eq.unc      px = r0, r0     // px = pz
+(py)    cmp.eq.unc      pt = r0, r0     // pt = py
+        ;;
+        // cycle 2
+(pt)    cmp.ne.and      px = r0, r0     // px = px & !pt (px = pz & !pt)
+(pz)    cmp.ne.and      pt = r0, r0     // pt = pt & !pz
+        ;;
+        } { .mmi
+        // cycle 3
+(pt)    cmp.eq.or       px = r0, r0     // px = px | pt
+
+*** Another, which we use here, requires one scratch GR. it is:
+
+        mov             rt = 0          // initialize rt off critical path
+        ;;
+
+        // cycle 1
+(pz)    cmp.eq.unc      px = r0, r0     // px = pz
+(pz)    mov             rt = 1          // rt = pz
+        ;;
+        // cycle 2
+(py)    cmp.ne          px = 1, rt      // if (py) px = !pz
+
+.. these routines kindly provided by Jim Hull
+*/
+  
+def bXOR  : Pat<(xor PR:$src1, PR:$src2),
+          (TPCMPIMM8NE (PCMPEQUNCR0R0 PR:$src2), 1,
+	               (PADDS r0, 1, PR:$src2),
+                        PR:$src1)>;
+
+def XOR   : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "xor $dst = $src1, $src2;;",
+	  [(set GR:$dst, (xor GR:$src1, GR:$src2))]>;
+
+def SHLADD: AForm_DAG<0x03, 0x0b, (ops GR:$dst,GR:$src1,s64imm:$imm,GR:$src2),
+          "shladd $dst = $src1, $imm, $src2;;",
+          [(set GR:$dst, (add GR:$src2, (shl GR:$src1, isSHLADDimm:$imm)))]>;
+
+def SHL   : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "shl $dst = $src1, $src2;;",
+	  [(set GR:$dst, (shl GR:$src1, GR:$src2))]>;
+
+def SHRU  : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "shr.u $dst = $src1, $src2;;",
+	  [(set GR:$dst, (srl GR:$src1, GR:$src2))]>;
+
+def SHRS  : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+          "shr $dst = $src1, $src2;;",
+	  [(set GR:$dst, (sra GR:$src1, GR:$src2))]>;
 
 def MOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "mov $dst = $src;;">;
+def FMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+  "mov $dst = $src;;">; // XXX: there _is_ no fmov
 def PMOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src, PR:$qp),
   "($qp) mov $dst = $src;;">;
 
@@ -71,6 +411,43 @@
     "($qp) mov $dst = $src;;">;
 }
 
+// TODO: select bools
+def SELECTINT : Pat<(select PR:$which, GR:$src1, GR:$src2),
+          (CMOV (MOV GR:$src2), GR:$src1, PR:$which)>; // note order!
+def SELECTFP : Pat<(select PR:$which, FP:$src1, FP:$src2),
+          (CFMOV (FMOV FP:$src2), FP:$src1, PR:$which)>; // note order!
+
+// load constants of various sizes // FIXME: prettyprint -ve constants
+def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>;
+def : Pat<(i64 imm64:$imm), (MOVL imm64:$imm)>;
+def : Pat<(i1 -1), (CMPEQ r0, r0)>; // TODO: this should just be a ref to p0
+def : Pat<(i1  0), (CMPNE r0, r0)>; // TODO: any instruction actually *using*
+                                    //       this predicate should be killed!
+
+// TODO: support postincrement (reg, imm9) loads+stores - this needs more
+// tablegen support
+
+def PHI : PseudoInstIA64<(ops variable_ops), "PHI">;
+def IDEF : PseudoInstIA64<(ops variable_ops), "// IDEF">;
+
+def IDEF_GR_D : PseudoInstIA64_DAG<(ops GR:$reg), "// $reg = IDEF",
+    [(set GR:$reg, (undef))]>;
+def IDEF_FP_D : PseudoInstIA64_DAG<(ops FP:$reg), "// $reg = IDEF",
+    [(set FP:$reg, (undef))]>;
+def IDEF_PR_D : PseudoInstIA64_DAG<(ops PR:$reg), "// $reg = IDEF",
+    [(set PR:$reg, (undef))]>;
+
+def IUSE : PseudoInstIA64<(ops variable_ops), "// IUSE">;
+def ADJUSTCALLSTACKUP : PseudoInstIA64<(ops variable_ops),
+                                        "// ADJUSTCALLSTACKUP">;
+def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(ops variable_ops),
+                                         "// ADJUSTCALLSTACKDOWN">;
+def PSEUDO_ALLOC : PseudoInstIA64<(ops GR:$foo), "// PSEUDO_ALLOC">;
+
+def ALLOC : AForm<0x03, 0x0b,
+  (ops GR:$dst, i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating),
+    "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating;;">;
+
 let isTwoAddress = 1 in {
   def TCMPNE : AForm<0x03, 0x0b,
   (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4),
@@ -96,85 +473,18 @@
 def MOVLIMM64 : AForm<0x03, 0x0b, (ops GR:$dst, s64imm:$imm),
   "movl $dst = $imm;;">;
 
-def AND : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "and $dst = $src1, $src2;;">;
-def OR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "or $dst = $src1, $src2;;">;
-def XOR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "xor $dst = $src1, $src2;;">;
-def SHL : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "shl $dst = $src1, $src2;;">;
 def SHLI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm), 
   "shl $dst = $src1, $imm;;">;
-def SHRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "shr.u $dst = $src1, $src2;;">;
 def SHRUI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm),
   "shr.u $dst = $src1, $imm;;">;
-def SHRS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "shr $dst = $src1, $src2;;">;
 def SHRSI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm),
   "shr $dst = $src1, $imm;;">;
 
-def SHLADD : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm, GR:$src2), 
-  "shladd $dst = $src1, $imm, $src2;;">;
-
 def EXTRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),
   "extr.u $dst = $src1, $imm1, $imm2;;">;
 
 def DEPZ : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),	  "dep.z $dst = $src1, $imm1, $imm2;;">;
 
-def SXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;">;
-def ZXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src;;">;
-def SXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src;;">;
-def ZXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src;;">;
-def SXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src;;">;
-def ZXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src;;">;
-
-// the following are all a bit unfortunate: we throw away the complement
-// of the compare!
-def CMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.eq $dst, p0 = $src1, $src2;;">;
-def CMPGT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.gt $dst, p0 = $src1, $src2;;">;
-def CMPGE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.ge $dst, p0 = $src1, $src2;;">;
-def CMPLT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.lt $dst, p0 = $src1, $src2;;">;
-def CMPLE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.le $dst, p0 = $src1, $src2;;">;
-def CMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.ne $dst, p0 = $src1, $src2;;">;
-def CMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.ltu $dst, p0 = $src1, $src2;;">;
-def CMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.gtu $dst, p0 = $src1, $src2;;">;
-def CMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.leu $dst, p0 = $src1, $src2;;">;
-def CMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
-  "cmp.geu $dst, p0 = $src1, $src2;;">;
-
-// and we do the whole thing again for FP compares!
-def FCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.eq $dst, p0 = $src1, $src2;;">;
-def FCMPGT : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.gt $dst, p0 = $src1, $src2;;">;
-def FCMPGE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.ge $dst, p0 = $src1, $src2;;">;
-def FCMPLT : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.lt $dst, p0 = $src1, $src2;;">;
-def FCMPLE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.le $dst, p0 = $src1, $src2;;">;
-def FCMPNE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.neq $dst, p0 = $src1, $src2;;">;
-def FCMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.ltu $dst, p0 = $src1, $src2;;">;
-def FCMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.gtu $dst, p0 = $src1, $src2;;">;
-def FCMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.leu $dst, p0 = $src1, $src2;;">;
-def FCMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
-  "fcmp.geu $dst, p0 = $src1, $src2;;">;
-
 def PCMPEQOR : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp),
   "($qp) cmp.eq.or $dst, p0 = $src1, $src2;;">;
 def PCMPEQUNC : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp),
@@ -186,8 +496,6 @@
 def BCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst1, PR:$dst2, GR:$src1, GR:$src2),
   "cmp.eq $dst1, dst2 = $src1, $src2;;">;
 
-def ADD : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "add $dst = $src1, $src2;;">;
 def ADDIMM14 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
   "adds $dst = $imm, $src1;;">;
 
@@ -196,63 +504,73 @@
 def CADDIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s22imm:$imm, PR:$qp),
   "($qp) add $dst = $imm, $src1;;">;
 
-let isTwoAddress = 1 in {
-def TPCADDIMM22 : AForm<0x03, 0x0b,
-  (ops GR:$dst, GR:$src1, s22imm:$imm, PR:$qp),
-    "($qp) add $dst = $imm, $dst;;">;
-def TPCMPIMM8NE : AForm<0x03, 0x0b,
-  (ops PR:$dst, PR:$src1, s22imm:$imm, GR:$src2, PR:$qp),
-    "($qp) cmp.ne $dst , p0 = $imm, $src2;;">;
-}
-
-def SUB : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
-  "sub $dst = $src1, $src2;;">;
 def SUBIMM8 : AForm<0x03, 0x0b, (ops GR:$dst, s8imm:$imm, GR:$src2),
   "sub $dst = $imm, $src2;;">;
 
-def ST1 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
-  "st1 [$dstPtr] = $value;;">;
-def ST2 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
-  "st2 [$dstPtr] = $value;;">;
-def ST4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
-  "st4 [$dstPtr] = $value;;">;
-def ST8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
-  "st8 [$dstPtr] = $value;;">;
-
-def LD1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
-  "ld1 $dst = [$srcPtr];;">;
-def LD2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
-  "ld2 $dst = [$srcPtr];;">;
-def LD4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
-  "ld4 $dst = [$srcPtr];;">;
-def LD8 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
-  "ld8 $dst = [$srcPtr];;">;
-
-def POPCNT : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "popcnt $dst = $src;;">;
-
-// some FP stuff:
-def FADD : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
-  "fadd $dst = $src1, $src2;;">;
+let isStore = 1 in {
+  def ST1 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+    "st1 [$dstPtr] = $value;;">;
+  def ST2 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+    "st2 [$dstPtr] = $value;;">;
+  def ST4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+    "st4 [$dstPtr] = $value;;">;
+  def ST8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+    "st8 [$dstPtr] = $value;;">;
+  def STF4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
+    "stfs [$dstPtr] = $value;;">;
+  def STF8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
+    "stfd [$dstPtr] = $value;;">;
+}
+
+let isLoad = 1 in {
+  def LD1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+    "ld1 $dst = [$srcPtr];;">;
+  def LD2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+    "ld2 $dst = [$srcPtr];;">;
+  def LD4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+    "ld4 $dst = [$srcPtr];;">;
+  def LD8 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+    "ld8 $dst = [$srcPtr];;">;
+  def LDF4 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
+    "ldfs $dst = [$srcPtr];;">;
+  def LDF8 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
+    "ldfd $dst = [$srcPtr];;">;
+}
+
+def POPCNT : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src),
+  "popcnt $dst = $src;;",
+  [(set GR:$dst, (ctpop GR:$src))]>;
+
+// some FP stuff:  // TODO: single-precision stuff?
+def FADD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+  "fadd $dst = $src1, $src2;;",
+  [(set FP:$dst, (fadd FP:$src1, FP:$src2))]>;
 def FADDS: AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
   "fadd.s $dst = $src1, $src2;;">;
-def FSUB : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
-  "fsub $dst = $src1, $src2;;">;
-def FMPY : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
-  "fmpy $dst = $src1, $src2;;">;
-def FMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
-  "mov $dst = $src;;">; // XXX: there _is_ no fmov
-def FMA : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
-  "fma $dst = $src1, $src2, $src3;;">;
-def FMS : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
-  "fms $dst = $src1, $src2, $src3;;">;
-def FNMA : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
-  "fnma $dst = $src1, $src2, $src3;;">;
-def FABS : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
-  "fabs $dst = $src;;">;
-def FNEG : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
-  "fneg $dst = $src;;">;
-def FNEGABS : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
-  "fnegabs $dst = $src;;">;
+def FSUB : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+  "fsub $dst = $src1, $src2;;",
+  [(set FP:$dst, (fsub FP:$src1, FP:$src2))]>;
+def FMPY : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+  "fmpy $dst = $src1, $src2;;",
+  [(set FP:$dst, (fmul FP:$src1, FP:$src2))]>;
+def FMA : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "fma $dst = $src1, $src2, $src3;;",
+  [(set FP:$dst, (fadd (fmul FP:$src1, FP:$src2), FP:$src3))]>;
+def FMS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "fms $dst = $src1, $src2, $src3;;",
+  [(set FP:$dst, (fsub (fmul FP:$src1, FP:$src2), FP:$src3))]>;
+def FNMA : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+  "fnma $dst = $src1, $src2, $src3;;",
+  [(set FP:$dst, (fneg (fadd (fmul FP:$src1, FP:$src2), FP:$src3)))]>;
+def FABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+  "fabs $dst = $src;;",
+  [(set FP:$dst, (fabs FP:$src))]>;
+def FNEG : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+  "fneg $dst = $src;;",
+  [(set FP:$dst, (fneg FP:$src))]>;
+def FNEGABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+  "fnegabs $dst = $src;;",
+  [(set FP:$dst, (fneg (fabs FP:$src)))]>;
 
 def CFMAS1 : AForm<0x03, 0x0b,
   (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
@@ -301,17 +619,20 @@
 def SETFSIG : AForm<0x03, 0x0b, (ops FP:$dst, GR:$src),
   "setf.sig $dst = $src;;">;
 
-def LDF4 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
-  "ldfs $dst = [$srcPtr];;">;
-def LDF8 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
-  "ldfd $dst = [$srcPtr];;">;
-
-def STF4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
-  "stfs [$dstPtr] = $value;;">;
-def STF8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
-  "stfd [$dstPtr] = $value;;">;
+// these four FP<->int conversion patterns need checking/cleaning
+def SINT_TO_FP : Pat<(sint_to_fp GR:$src),
+  (FNORMD (FCVTXF (SETFSIG GR:$src)))>;
+def UINT_TO_FP : Pat<(uint_to_fp GR:$src),
+  (FNORMD (FCVTXUF (SETFSIG GR:$src)))>;
+def FP_TO_SINT : Pat<(i64 (fp_to_sint FP:$src)),
+  (GETFSIG (FCVTFXTRUNC FP:$src))>;
+def FP_TO_UINT : Pat<(i64 (fp_to_uint FP:$src)),
+  (GETFSIG (FCVTFXUTRUNC FP:$src))>;
+
 
 let isTerminator = 1, isBranch = 1 in {
+  def BRL_NOTCALL : RawForm<0x03, 0xb0, (ops i64imm:$dst),
+    "(p0) brl.cond.sptk $dst;;">;
   def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst),
     "($qp) brl.cond.sptk $dst;;">;
   def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst),
@@ -334,8 +655,14 @@
   F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119,
   F120,F121,F122,F123,F124,F125,F126,F127,
   out0,out1,out2,out3,out4,out5,out6,out7] in {
-  def BRCALL : RawForm<0x03, 0xb0, (ops calltarget:$dst),
+// old pattern call
+  def BRCALL: RawForm<0x03, 0xb0, (ops calltarget:$dst),
+  "br.call.sptk rp = $dst;;">;       // FIXME: teach llvm about branch regs?
+// new daggy stuff!  
+  def BRCALL_IPREL : RawForm<0x03, 0xb0, (ops calltarget:$dst, variable_ops),
   "br.call.sptk rp = $dst;;">;       // FIXME: teach llvm about branch regs?
+  def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (ops GR:$branchreg, variable_ops),
+  "br.call.sptk rp = $branchreg;;">; // FIXME: teach llvm about branch regs?
   def BRLCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst),
     "($qp) brl.cond.call.sptk $dst;;">;
   def BRCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst),


Index: llvm/lib/Target/IA64/IA64RegisterInfo.cpp
diff -u llvm/lib/Target/IA64/IA64RegisterInfo.cpp:1.7 llvm/lib/Target/IA64/IA64RegisterInfo.cpp:1.7.2.1
--- llvm/lib/Target/IA64/IA64RegisterInfo.cpp:1.7	Thu Sep 29 20:30:29 2005
+++ llvm/lib/Target/IA64/IA64RegisterInfo.cpp	Wed Nov 16 12:32:36 2005
@@ -28,38 +28,23 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/STLExtras.h"
 #include <iostream>
-
 using namespace llvm;
 
-namespace {
-}
 
 IA64RegisterInfo::IA64RegisterInfo()
   : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP) {}
 
-static const TargetRegisterClass *getClass(unsigned SrcReg) {
-  if (IA64::FPRegisterClass->contains(SrcReg))
-    return IA64::FPRegisterClass;
-  if (IA64::PRRegisterClass->contains(SrcReg))
-    return IA64::PRRegisterClass;
-
-  assert(IA64::GRRegisterClass->contains(SrcReg) &&
-         "PROBLEM: Reg is not FP, predicate or GR!");
-  return IA64::GRRegisterClass;
-}
-
 void IA64RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned SrcReg, int FrameIdx,
                                            const TargetRegisterClass *RC) const{
 
-  if (getClass(SrcReg) == IA64::FPRegisterClass) {
+  if (RC == IA64::FPRegisterClass) {
     BuildMI(MBB, MI, IA64::STF8, 2).addFrameIndex(FrameIdx).addReg(SrcReg);
-  }
-  else if (getClass(SrcReg) == IA64::GRRegisterClass) {
+  } else if (RC == IA64::GRRegisterClass) {
     BuildMI(MBB, MI, IA64::ST8, 2).addFrameIndex(FrameIdx).addReg(SrcReg);
  }
-  else if (getClass(SrcReg) == IA64::PRRegisterClass) {
+  else if (RC == IA64::PRRegisterClass) {
     /* we use IA64::r2 as a temporary register for doing this hackery. */
     // first we load 0:
     BuildMI(MBB, MI, IA64::MOV, 1, IA64::r2).addReg(IA64::r0);
@@ -77,11 +62,11 @@
                                             unsigned DestReg, int FrameIdx,
                                             const TargetRegisterClass *RC)const{
 
-  if (getClass(DestReg) == IA64::FPRegisterClass) {
+  if (RC == IA64::FPRegisterClass) {
     BuildMI(MBB, MI, IA64::LDF8, 1, DestReg).addFrameIndex(FrameIdx);
-  } else if (getClass(DestReg) == IA64::GRRegisterClass) {
+  } else if (RC == IA64::GRRegisterClass) {
     BuildMI(MBB, MI, IA64::LD8, 1, DestReg).addFrameIndex(FrameIdx);
- } else if (getClass(DestReg) == IA64::PRRegisterClass) {
+ } else if (RC == IA64::PRRegisterClass) {
    // first we load a byte from the stack into r2, our 'predicate hackery'
    // scratch reg
    BuildMI(MBB, MI, IA64::LD8, 1, IA64::r2).addFrameIndex(FrameIdx);


Index: llvm/lib/Target/IA64/IA64RegisterInfo.td
diff -u llvm/lib/Target/IA64/IA64RegisterInfo.td:1.8 llvm/lib/Target/IA64/IA64RegisterInfo.td:1.8.2.1
--- llvm/lib/Target/IA64/IA64RegisterInfo.td:1.8	Fri Aug 19 14:13:20 2005
+++ llvm/lib/Target/IA64/IA64RegisterInfo.td	Wed Nov 16 12:32:36 2005
@@ -211,7 +211,7 @@
 
 // application (special) registers:
 
-// " previous function state" application register
+// "previous function state" application register
 def AR_PFS : GR<0, "ar.pfs">;
 
 // "return pointer" (this is really branch register b0)
@@ -226,7 +226,6 @@
 //
 
 // these are the scratch (+stacked) general registers
-// ZERO (r0), GP (r1), SP (r12), ThreadP (r13) are not here... 
 // FIXME/XXX  we also reserve a frame pointer (r15)
 // FIXME/XXX  we also reserve r2 for spilling/filling predicates
 // in IA64RegisterInfo.cpp
@@ -255,7 +254,7 @@
         r104, r105, r106, r107, r108, r109, r110, r111,
         r112, r113, r114, r115, r116, r117, r118, r119,
         r120, r121, r122, r123, r124, r125, r126, r127,
-	r0, r1, r2, r12, r13, r15, r22]> // the last 15 are special (look down)
+	r0, r1, r2, r12, r13, r15, r22, rp]> // the last 16 are special (look down)
   {
     let MethodProtos = [{
     iterator allocation_order_begin(MachineFunction &MF) const;
@@ -264,13 +263,13 @@
   let MethodBodies = [{
     GRClass::iterator
     GRClass::allocation_order_begin(MachineFunction &MF) const {
-	// hide registers appropriately:
+	// hide the 8 out? registers appropriately:
 	return begin()+(8-(MF.getInfo<IA64FunctionInfo>()->outRegsUsed));
       }
 
       GRClass::iterator
       GRClass::allocation_order_end(MachineFunction &MF) const {
-	int numReservedRegs=7; // the 7 special registers r0,r1,r2,r12,r13 etc
+	int numReservedRegs=8; // the 8 special registers r0,r1,r2,r12,r13 etc
 
 	// we also can't allocate registers for use as locals if they're
 	// already required as 'out' registers
@@ -283,7 +282,6 @@
 
 
 // these are the scratch (+stacked) FP registers
-// ZERO (F0) and ONE (F1) are not here
 def FP : RegisterClass<"IA64", f64, 64, 
        [F6, F7, 
 	F8, F9, F10, F11, F12, F13, F14, F15, 
@@ -298,7 +296,25 @@
 	F96, F97, F98, F99, F100, F101, F102, F103, 
 	F104, F105, F106, F107, F108, F109, F110, F111, 
 	F112, F113, F114, F115, F116, F117, F118, F119, 
-	F120, F121, F122, F123, F124, F125, F126, F127]>;
+	F120, F121, F122, F123, F124, F125, F126, F127,
+	F0, F1]> // these last two are hidden
+  {
+    let MethodProtos = [{
+    iterator allocation_order_begin(MachineFunction &MF) const;
+    iterator allocation_order_end(MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    FPClass::iterator
+    FPClass::allocation_order_begin(MachineFunction &MF) const {
+	return begin(); // we don't hide any FP regs from the start
+      }
+
+      FPClass::iterator
+      FPClass::allocation_order_end(MachineFunction &MF) const {
+	return end()-2; // we hide regs F0, F1 from the end 
+      }
+  }];
+}
 
 // these are the predicate registers, p0 (1/TRUE) is not here
 def PR : RegisterClass<"IA64", i1, 64, 


Index: llvm/lib/Target/IA64/IA64TargetMachine.cpp
diff -u llvm/lib/Target/IA64/IA64TargetMachine.cpp:1.5 llvm/lib/Target/IA64/IA64TargetMachine.cpp:1.5.2.1
--- llvm/lib/Target/IA64/IA64TargetMachine.cpp:1.5	Thu Sep  1 16:38:20 2005
+++ llvm/lib/Target/IA64/IA64TargetMachine.cpp	Wed Nov 16 12:32:36 2005
@@ -37,6 +37,9 @@
                               cl::desc("Disable the IA64 asm printer, for use "
                                        "when profiling the code generator."));
 
+  cl::opt<bool> EnableDAGIsel("enable-ia64-dag-isel", cl::Hidden,
+		              cl::desc("Enable the IA64 DAG->DAG isel"));
+
   // Register the target.
   RegisterTarget<IA64TargetMachine> X("ia64", "  IA-64 (Itanium)");
 }
@@ -82,14 +85,16 @@
 // does to emit statically compiled machine code.
 bool IA64TargetMachine::addPassesToEmitFile(PassManager &PM,
                                             std::ostream &Out,
-                                                CodeGenFileType FileType) {
+                                            CodeGenFileType FileType,
+                                            bool Fast) {
   if (FileType != TargetMachine::AssemblyFile) return true;
 
   // FIXME: Implement efficient support for garbage collection intrinsics.
   PM.add(createLowerGCPass());
 
   // FIXME: Implement the invoke/unwind instructions!
-  PM.add(createLowerInvokePass());
+  PM.add(createLowerInvokePass(704, 16)); // on ia64 linux, jmpbufs are 704
+                                          // bytes and must be 16byte aligned
 
   // FIXME: Implement the switch instruction in the instruction selector!
   PM.add(createLowerSwitchPass());
@@ -97,8 +102,12 @@
   // Make sure that no unreachable blocks are instruction selected.
   PM.add(createUnreachableBlockEliminationPass());
 
-  PM.add(createIA64PatternInstructionSelector(*this));
-
+  // Add an instruction selector
+  if(EnableDAGIsel)
+    PM.add(createIA64DAGToDAGInstructionSelector(*this));
+  else
+    PM.add(createIA64PatternInstructionSelector(*this));
+  
 /* XXX not yet. ;)
   // Run optional SSA-based machine code optimizations next...
   if (!NoSSAPeephole)


Index: llvm/lib/Target/IA64/IA64TargetMachine.h
diff -u llvm/lib/Target/IA64/IA64TargetMachine.h:1.4 llvm/lib/Target/IA64/IA64TargetMachine.h:1.4.2.1
--- llvm/lib/Target/IA64/IA64TargetMachine.h:1.4	Thu Sep  1 16:38:20 2005
+++ llvm/lib/Target/IA64/IA64TargetMachine.h	Wed Nov 16 12:32:36 2005
@@ -37,7 +37,7 @@
   }
 
   virtual bool addPassesToEmitFile(PassManager &PM, std::ostream &Out,
-                                   CodeGenFileType FileType);
+                                   CodeGenFileType FileType, bool Fast);
 
   static unsigned getModuleMatchQuality(const Module &M);
   static unsigned compileTimeMatchQuality(void);


Index: llvm/lib/Target/IA64/Makefile
diff -u llvm/lib/Target/IA64/Makefile:1.3 llvm/lib/Target/IA64/Makefile:1.3.4.1
--- llvm/lib/Target/IA64/Makefile:1.3	Thu Mar 17 12:37:05 2005
+++ llvm/lib/Target/IA64/Makefile	Wed Nov 16 12:32:36 2005
@@ -11,7 +11,8 @@
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \
                 IA64GenRegisterInfo.inc IA64GenInstrNames.inc \
-                IA64GenInstrInfo.inc IA64GenAsmWriter.inc
+                IA64GenInstrInfo.inc IA64GenAsmWriter.inc \
+		IA64GenDAGISel.inc
 
 include $(LEVEL)/Makefile.common
 


Index: llvm/lib/Target/IA64/README
diff -u llvm/lib/Target/IA64/README:1.4 llvm/lib/Target/IA64/README:1.4.4.1
--- llvm/lib/Target/IA64/README:1.4	Tue Apr 12 13:42:59 2005
+++ llvm/lib/Target/IA64/README	Wed Nov 16 12:32:36 2005
@@ -54,6 +54,8 @@
 
 TODO:
 
+  - stop passing FP args in both FP *and* integer regs when not required
+  - allocate low (nonstacked) registers more aggressively
   - clean up and thoroughly test the isel patterns.
   - fix stacked register allocation order: (for readability) we don't want
     the out? registers being the first ones used
@@ -62,7 +64,7 @@
   - bundling!
     (we will avoid the mess that is:
      http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html )
-  - instruction scheduling (yep)
+  - instruction scheduling (hmmmm! ;)
   - write truly inspirational documentation
   - if-conversion (predicate database/knowledge? etc etc)
   - counted loop support