[llvm-commits] [parallel] CVS: llvm/lib/Target/X86/InstSelectSimple.cpp Printer.cpp X86CodeEmitter.cpp X86InstrBuilder.h X86InstrInfo.td X86RegisterInfo.cpp X86TargetMachine.cpp X86TargetMachine.h

Wed Mar 10 19:02:00 PST 2004

Changes in directory llvm/lib/Target/X86:

InstSelectSimple.cpp updated: 1.149.2.1 -> 1.149.2.2
Printer.cpp updated: 1.76.2.1 -> 1.76.2.2
X86CodeEmitter.cpp updated: 1.46.2.1 -> 1.46.2.2
X86InstrBuilder.h updated: 1.9.4.1 -> 1.9.4.2
X86InstrInfo.td updated: 1.15.2.1 -> 1.15.2.2
X86RegisterInfo.cpp updated: 1.40.4.1 -> 1.40.4.2
X86TargetMachine.cpp updated: 1.44.2.1 -> 1.44.2.2
X86TargetMachine.h updated: 1.21.2.1 -> 1.21.2.2

---
Log message:

Merge from trunk.


---
Diffs of the changes:  (+306 -202)

Index: llvm/lib/Target/X86/InstSelectSimple.cpp
diff -u llvm/lib/Target/X86/InstSelectSimple.cpp:1.149.2.1 llvm/lib/Target/X86/InstSelectSimple.cpp:1.149.2.2

--- llvm/lib/Target/X86/InstSelectSimple.cpp:1.149.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/InstSelectSimple.cpp	Wed Mar 10 19:01:46 2004
@@ -107,6 +107,7 @@
     /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
     /// function, lowering any calls to unknown intrinsic functions into the
     /// equivalent LLVM code.
+    ///
     void LowerUnknownIntrinsicFunctionCalls(Function &F);
 
     /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function
@@ -198,8 +199,14 @@
     ///
     void promote32(unsigned targetReg, const ValueRecord &VR);
 
-    // getGEPIndex - This is used to fold GEP instructions into X86 addressing
-    // expressions.
+    /// getAddressingMode - Get the addressing mode to use to address the
+    /// specified value.  The returned value should be used with addFullAddress.
+    void getAddressingMode(Value *Addr, unsigned &BaseReg, unsigned &Scale,
+                           unsigned &IndexReg, unsigned &Disp);
+
+
+    /// getGEPIndex - This is used to fold GEP instructions into X86 addressing
+    /// expressions.
     void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
                      std::vector<Value*> &GEPOps,
                      std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
@@ -221,11 +228,13 @@
 
     /// emitCastOperation - Common code shared between visitCastInst and
     /// constant expression cast support.
+    ///
     void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
                            Value *Src, const Type *DestTy, unsigned TargetReg);
 
     /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
     /// and constant expression support.
+    ///
     void emitSimpleBinaryOperation(MachineBasicBlock *BB,
                                    MachineBasicBlock::iterator IP,
                                    Value *Op0, Value *Op1,
@@ -238,6 +247,7 @@
 
     /// emitSetCCOperation - Common code shared between visitSetCondInst and
     /// constant expression support.
+    ///
     void emitSetCCOperation(MachineBasicBlock *BB,
                             MachineBasicBlock::iterator IP,
                             Value *Op0, Value *Op1, unsigned Opcode,
@@ -245,6 +255,7 @@
 
     /// emitShiftOperation - Common code shared between visitShiftInst and
     /// constant expression support.
+    ///
     void emitShiftOperation(MachineBasicBlock *MBB,
                             MachineBasicBlock::iterator IP,
                             Value *Op, Value *ShiftAmount, bool isLeftShift,
@@ -709,9 +720,13 @@
       ++NumFPKill;
     }
   }
+  // If we got this far, there is no need to insert the kill instruction.
+  return false;
+#else
+  return true;
+#endif
 }
 
-
 // canFoldSetCCIntoBranch - Return the setcc instruction if we can fold it into
 // the conditional branch instruction which is the only user of the cc
 // instruction.  This is the case if the conditional branch is the only user of
@@ -887,6 +902,7 @@
 
 /// emitSetCCOperation - Common code shared between visitSetCondInst and
 /// constant expression support.
+///
 void ISel::emitSetCCOperation(MachineBasicBlock *MBB,
                               MachineBasicBlock::iterator IP,
                               Value *Op0, Value *Op1, unsigned Opcode,
@@ -913,6 +929,7 @@
 
 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
 /// operand, in the specified target register.
+///
 void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
   bool isUnsigned = VR.Ty->isUnsigned();
 
@@ -1221,6 +1238,7 @@
 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
 /// function, lowering any calls to unknown intrinsic functions into the
 /// equivalent LLVM code.
+///
 void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
@@ -1400,15 +1418,67 @@
   }
 }
 
+static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) {
+  if (LI.getParent() != User.getParent())
+    return false;
+  BasicBlock::iterator It = &LI;
+  // Check all of the instructions between the load and the user.  We should
+  // really use alias analysis here, but for now we just do something simple.
+  for (++It; It != BasicBlock::iterator(&User); ++It) {
+    switch (It->getOpcode()) {
+    case Instruction::Store:
+    case Instruction::Call:
+    case Instruction::Invoke:
+      return false;
+    }
+  }
+  return true;
+}
+
 
 /// visitSimpleBinary - Implement simple binary operators for integral types...
 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
 /// Xor.
+///
 void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
   unsigned DestReg = getReg(B);
   MachineBasicBlock::iterator MI = BB->end();
-  emitSimpleBinaryOperation(BB, MI, B.getOperand(0), B.getOperand(1),
-                            OperatorClass, DestReg);
+  Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
+
+  // Special case: op Reg, load [mem]
+  if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
+    if (!B.swapOperands())
+      std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
+
+  unsigned Class = getClassB(B.getType());
+  if (isa<LoadInst>(Op1) && Class < cFP &&
+      isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
+
+    static const unsigned OpcodeTab[][3] = {
+      // Arithmetic operators
+      { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm },  // ADD
+      { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm },  // SUB
+      
+      // Bitwise operators
+      { X86::AND8rm, X86::AND16rm, X86::AND32rm },  // AND
+      { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm },  // OR
+      { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm },  // XOR
+    };
+  
+    assert(Class < cFP && "General code handles 64-bit integer types!");
+    unsigned Opcode = OpcodeTab[OperatorClass][Class];
+
+    unsigned BaseReg, Scale, IndexReg, Disp;
+    getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), BaseReg,
+                      Scale, IndexReg, Disp);
+
+    unsigned Op0r = getReg(Op0);
+    addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r),
+                   BaseReg, Scale, IndexReg, Disp);
+    return;
+  }
+
+  emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
 }
 
 /// emitSimpleBinaryOperation - Implement simple binary operators for integral
@@ -1450,83 +1520,82 @@
         return;
       }
 
-  if (!isa<ConstantInt>(Op1) || Class == cLong) {
-    static const unsigned OpcodeTab[][4] = {
-      // Arithmetic operators
-      { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD },  // ADD
-      { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB },  // SUB
-      
-      // Bitwise operators
-      { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0 },  // AND
-      { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0 },  // OR
-      { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0 },  // XOR
-    };
-    
-    bool isLong = false;
-    if (Class == cLong) {
-      isLong = true;
-      Class = cInt;          // Bottom 32 bits are handled just like ints
-    }
-    
-    unsigned Opcode = OpcodeTab[OperatorClass][Class];
-    assert(Opcode && "Floating point arguments to logical inst?");
+  // Special case: op Reg, <const>
+  if (Class != cLong && isa<ConstantInt>(Op1)) {
+    ConstantInt *Op1C = cast<ConstantInt>(Op1);
     unsigned Op0r = getReg(Op0, MBB, IP);
-    unsigned Op1r = getReg(Op1, MBB, IP);
-    BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
-    
-    if (isLong) {        // Handle the upper 32 bits of long values...
-      static const unsigned TopTab[] = {
-        X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
-      };
-      BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
-          DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
+
+    // xor X, -1 -> not X
+    if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
+      static unsigned const NOTTab[] = { X86::NOT8r, X86::NOT16r, X86::NOT32r };
+      BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
+      return;
     }
-    return;
-  }
 
-  // Special case: op Reg, <const>
-  ConstantInt *Op1C = cast<ConstantInt>(Op1);
-  unsigned Op0r = getReg(Op0, MBB, IP);
+    // add X, -1 -> dec X
+    if (OperatorClass == 0 && Op1C->isAllOnesValue()) {
+      static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
+      BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
+      return;
+    }
 
-  // xor X, -1 -> not X
-  if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
-    static unsigned const NOTTab[] = { X86::NOT8r, X86::NOT16r, X86::NOT32r };
-    BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
-    return;
-  }
+    // add X, 1 -> inc X
+    if (OperatorClass == 0 && Op1C->equalsInt(1)) {
+      static unsigned const DECTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
+      BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
+      return;
+    }
+  
+    static const unsigned OpcodeTab[][3] = {
+      // Arithmetic operators
+      { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri },  // ADD
+      { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri },  // SUB
+    
+      // Bitwise operators
+      { X86::AND8ri, X86::AND16ri, X86::AND32ri },  // AND
+      { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri },  // OR
+      { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri },  // XOR
+    };
+  
+    assert(Class < cFP && "General code handles 64-bit integer types!");
+    unsigned Opcode = OpcodeTab[OperatorClass][Class];
 
-  // add X, -1 -> dec X
-  if (OperatorClass == 0 && Op1C->isAllOnesValue()) {
-    static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
-    BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
+    uint64_t Op1v = cast<ConstantInt>(Op1C)->getRawValue();
+    BuildMI(*MBB, IP, Opcode, 5, DestReg).addReg(Op0r).addImm(Op1v);
     return;
   }
 
-  // add X, 1 -> inc X
-  if (OperatorClass == 0 && Op1C->equalsInt(1)) {
-    static unsigned const DECTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
-    BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
-    return;
-  }
-  
-  static const unsigned OpcodeTab[][3] = {
+  // Finally, handle the general case now.
+  static const unsigned OpcodeTab[][4] = {
     // Arithmetic operators
-    { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri },  // ADD
-    { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri },  // SUB
-    
+    { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD },  // ADD
+    { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB },  // SUB
+      
     // Bitwise operators
-    { X86::AND8ri, X86::AND16ri, X86::AND32ri },  // AND
-    { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri },  // OR
-    { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri },  // XOR
+    { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0 },  // AND
+    { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0 },  // OR
+    { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0 },  // XOR
   };
-  
-  assert(Class < 3 && "General code handles 64-bit integer types!");
+    
+  bool isLong = false;
+  if (Class == cLong) {
+    isLong = true;
+    Class = cInt;          // Bottom 32 bits are handled just like ints
+  }
+    
   unsigned Opcode = OpcodeTab[OperatorClass][Class];
-  uint64_t Op1v = cast<ConstantInt>(Op1C)->getRawValue();
-  
-  // Mask off any upper bits of the constant, if there are any...
-  Op1v &= (1ULL << (8 << Class)) - 1;
-  BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1v);
+  assert(Opcode && "Floating point arguments to logical inst?");
+  unsigned Op0r = getReg(Op0, MBB, IP);
+  unsigned Op1r = getReg(Op1, MBB, IP);
+  BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+    
+  if (isLong) {        // Handle the upper 32 bits of long values...
+    static const unsigned TopTab[] = {
+      X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
+    };
+    BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
+            DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
+  }
 }
 
 /// doMultiply - Emit appropriate instructions to multiply together the
@@ -1895,31 +1964,66 @@
 }
 
 
-/// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
-/// instruction.  The load and store instructions are the only place where we
-/// need to worry about the memory layout of the target machine.
-///
-void ISel::visitLoadInst(LoadInst &I) {
-  unsigned DestReg = getReg(I);
-  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
-  Value *Addr = I.getOperand(0);
+void ISel::getAddressingMode(Value *Addr, unsigned &BaseReg, unsigned &Scale,
+                             unsigned &IndexReg, unsigned &Disp) {
+  BaseReg = 0; Scale = 1; IndexReg = 0; Disp = 0;
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
     if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
                        BaseReg, Scale, IndexReg, Disp))
-      Addr = 0;  // Address is consumed!
+      return;
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
     if (CE->getOpcode() == Instruction::GetElementPtr)
       if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
                         BaseReg, Scale, IndexReg, Disp))
-        Addr = 0;
+        return;
   }
 
-  if (Addr) {
-    // If it's not foldable, reset addr mode.
-    BaseReg = getReg(Addr);
-    Scale = 1; IndexReg = 0; Disp = 0;
+  // If it's not foldable, reset addr mode.
+  BaseReg = getReg(Addr);
+  Scale = 1; IndexReg = 0; Disp = 0;
+}
+
+
+/// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
+/// instruction.  The load and store instructions are the only place where we
+/// need to worry about the memory layout of the target machine.
+///
+void ISel::visitLoadInst(LoadInst &I) {
+  // Check to see if this load instruction is going to be folded into a binary
+  // instruction, like add.  If so, we don't want to emit it.  Wouldn't a real
+  // pattern matching instruction selector be nice?
+  if (I.hasOneUse() && getClassB(I.getType()) < cFP) {
+    Instruction *User = cast<Instruction>(I.use_back());
+    switch (User->getOpcode()) {
+    default: User = 0; break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+      break;
+    }
+
+    if (User) {
+      // Okay, we found a user.  If the load is the first operand and there is
+      // no second operand load, reverse the operand ordering.  Note that this
+      // can fail for a subtract (ie, no change will be made).
+      if (!isa<LoadInst>(User->getOperand(1)))
+        cast<BinaryOperator>(User)->swapOperands();
+      
+      // Okay, now that everything is set up, if this load is used by the second
+      // operand, and if there are no instructions that invalidate the load
+      // before the binary operator, eliminate the load.
+      if (User->getOperand(1) == &I &&
+          isSafeToFoldLoadIntoInstruction(I, *User))
+        return;   // Eliminate the load!
+    }
   }
 
+  unsigned DestReg = getReg(I);
+  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+  getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
+
   unsigned Class = getClassB(I.getType());
   if (Class == cLong) {
     addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg),
@@ -1942,24 +2046,8 @@
 /// instruction.
 ///
 void ISel::visitStoreInst(StoreInst &I) {
-  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
-  Value *Addr = I.getOperand(1);
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
-    if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
-                       BaseReg, Scale, IndexReg, Disp))
-      Addr = 0;  // Address is consumed!
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
-    if (CE->getOpcode() == Instruction::GetElementPtr)
-      if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
-                        BaseReg, Scale, IndexReg, Disp))
-        Addr = 0;
-  }
-
-  if (Addr) {
-    // If it's not foldable, reset addr mode.
-    BaseReg = getReg(Addr);
-    Scale = 1; IndexReg = 0; Disp = 0;
-  }
+  unsigned BaseReg, Scale, IndexReg, Disp;
+  getAddressingMode(I.getOperand(1), BaseReg, Scale, IndexReg, Disp);
 
   const Type *ValTy = I.getOperand(0)->getType();
   unsigned Class = getClassB(ValTy);
@@ -2003,8 +2091,9 @@
 }
 
 
-/// visitCastInst - Here we have various kinds of copying with or without
-/// sign extension going on.
+/// visitCastInst - Here we have various kinds of copying with or without sign
+/// extension going on.
+///
 void ISel::visitCastInst(CastInst &CI) {
   Value *Op = CI.getOperand(0);
   // If this is a cast from a 32-bit integer to a Long type, and the only uses
@@ -2028,8 +2117,9 @@
   emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
 }
 
-/// emitCastOperation - Common code shared between visitCastInst and
-/// constant expression cast support.
+/// emitCastOperation - Common code shared between visitCastInst and constant
+/// expression cast support.
+///
 void ISel::emitCastOperation(MachineBasicBlock *BB,
                              MachineBasicBlock::iterator IP,
                              Value *Src, const Type *DestTy,
@@ -2371,7 +2461,8 @@
   }
 }
 
-
+/// visitGetElementPtrInst - instruction-select GEP instructions
+///
 void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
   // If this GEP instruction will be folded into all of its users, we don't need
   // to explicitly calculate it!


Index: llvm/lib/Target/X86/Printer.cpp
diff -u llvm/lib/Target/X86/Printer.cpp:1.76.2.1 llvm/lib/Target/X86/Printer.cpp:1.76.2.2
--- llvm/lib/Target/X86/Printer.cpp:1.76.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/Printer.cpp	Wed Mar 10 19:01:46 2004
@@ -7,21 +7,23 @@
 // 
 //===----------------------------------------------------------------------===//
 //
-// This file contains a printer that converts from our internal
-// representation of machine-dependent LLVM code to Intel-format
-// assembly language. This printer is the output mechanism used
-// by `llc' and `lli -print-machineinstrs' on X86.
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Intel-format assembly language. This
+// printer is the output mechanism used by `llc' and `lli -print-machineinstrs'
+// on X86.
 //
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "X86TargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
@@ -38,6 +40,37 @@
   cl::opt<bool> EmitCygwin("enable-cygwin-compatible-output", cl::Hidden,
          cl::desc("Emit X86 assembly code suitable for consumption by cygwin"));
 
+  struct GasBugWorkaroundEmitter : public MachineCodeEmitter {
+      GasBugWorkaroundEmitter(std::ostream& o) 
+          : O(o), OldFlags(O.flags()), firstByte(true) {
+          O << std::hex;
+      }
+
+      ~GasBugWorkaroundEmitter() {
+          O.flags(OldFlags);
+          O << "\t# ";
+      }
+
+      virtual void emitByte(unsigned char B) {
+          if (!firstByte) O << "\n\t";
+          firstByte = false;
+          O << ".byte 0x" << (unsigned) B;
+      }
+
+      // These should never be called
+      virtual void emitWord(unsigned W) { assert(0); }
+      virtual uint64_t getGlobalValueAddress(GlobalValue *V) { assert(0); }
+      virtual uint64_t getGlobalValueAddress(const std::string &Name) { assert(0); }
+      virtual uint64_t getConstantPoolEntryAddress(unsigned Index) { assert(0); }
+      virtual uint64_t getCurrentPCValue() { assert(0); }
+      virtual uint64_t forceCompilationOf(Function *F) { assert(0); }
+
+  private:
+      std::ostream& O;
+      std::ios::fmtflags OldFlags;
+      bool firstByte;
+  };
+
   struct Printer : public MachineFunctionPass {
     /// Output stream on which we're printing assembly code.
     ///
@@ -635,6 +668,7 @@
       O << ", ";
       printOp(MI->getOperand(2));
     }
+    checkImplUses(Desc);
     O << "\n";
     return;
   }
@@ -656,6 +690,7 @@
       O << ", ";
       printOp(MI->getOperand(5));
     }
+    checkImplUses(Desc);
     O << "\n";
     return;
   }
@@ -768,82 +803,37 @@
 
     const MachineOperand &Op3 = MI->getOperand(3);
 
-    // Bug: The 80-bit FP store-pop instruction "fstp XWORD PTR [...]"
+    // gas bugs:
+    //
+    // The 80-bit FP store-pop instruction "fstp XWORD PTR [...]"
     // is misassembled by gas in intel_syntax mode as its 32-bit
     // equivalent "fstp DWORD PTR [...]". Workaround: Output the raw
     // opcode bytes instead of the instruction.
-    if (MI->getOpcode() == X86::FSTP80m) {
-      if ((MI->getOperand(0).getReg() == X86::ESP)
-	  && (MI->getOperand(1).getImmedValue() == 1)) {
-        if (Op3.isImmediate() && 
-            Op3.getImmedValue() >= -128 && Op3.getImmedValue() <= 127) {
-          // 1 byte disp.
-          O << ".byte 0xdb, 0x7c, 0x24, 0x" << std::hex
-            << ((unsigned)Op3.getImmedValue() & 255) << std::dec << "\t# ";
-        } else {
-          O << ".byte 0xdb, 0xbc, 0x24\n\t";
-          O << ".long ";
-          printOp(Op3);
-          O << "\t# ";
-	}
-      }
-    }
-
-    // Bug: The 80-bit FP load instruction "fld XWORD PTR [...]" is
+    //
+    // The 80-bit FP load instruction "fld XWORD PTR [...]" is
     // misassembled by gas in intel_syntax mode as its 32-bit
     // equivalent "fld DWORD PTR [...]". Workaround: Output the raw
     // opcode bytes instead of the instruction.
-    if (MI->getOpcode() == X86::FLD80m &&
-        MI->getOperand(0).getReg() == X86::ESP &&
-        MI->getOperand(1).getImmedValue() == 1) {
-      if (Op3.isImmediate() && Op3.getImmedValue() >= -128 &&
-          Op3.getImmedValue() <= 127) {   // 1 byte displacement
-        O << ".byte 0xdb, 0x6c, 0x24, 0x" << std::hex
-          << ((unsigned)Op3.getImmedValue() & 255) << std::dec << "\t# ";
-      } else {
-        O << ".byte 0xdb, 0xac, 0x24\n\t";
-        O << ".long ";
-        printOp(Op3);
-        O << "\t# ";
-      }
-    }
-
-    // Bug: gas intel_syntax mode treats "fild QWORD PTR [...]" as an
+    //
+    // gas intel_syntax mode treats "fild QWORD PTR [...]" as an
     // invalid opcode, saying "64 bit operations are only supported in
     // 64 bit modes." libopcodes disassembles it as "fild DWORD PTR
     // [...]", which is wrong. Workaround: Output the raw opcode bytes
     // instead of the instruction.
-    if (MI->getOpcode() == X86::FILD64m &&
-        MI->getOperand(0).getReg() == X86::ESP &&
-        MI->getOperand(1).getImmedValue() == 1) {
-      if (Op3.isImmediate() && Op3.getImmedValue() >= -128 &&
-          Op3.getImmedValue() <= 127) {   // 1 byte displacement
-        O << ".byte 0xdf, 0x6c, 0x24, 0x" << std::hex
-          << ((unsigned)Op3.getImmedValue() & 255) << std::dec << "\t# ";
-      } else {
-        O << ".byte 0xdf, 0xac, 0x24\n\t";
-        O << ".long ";
-        printOp(Op3);
-        O << std::dec << "\t# ";
-      }
+    //
+    // gas intel_syntax mode treats "fistp QWORD PTR [...]" as an
+    // invalid opcode, saying "64 bit operations are only supported in
+    // 64 bit modes." libopcodes disassembles it as "fistpll DWORD PTR
+    // [...]", which is wrong. Workaround: Output the raw opcode bytes
+    // instead of the instruction.
+    if (MI->getOpcode() == X86::FSTP80m ||
+        MI->getOpcode() == X86::FLD80m ||
+        MI->getOpcode() == X86::FILD64m ||
+        MI->getOpcode() == X86::FISTP64m) {
+        GasBugWorkaroundEmitter gwe(O);
+        X86::emitInstruction(gwe, (X86InstrInfo&)TM.getInstrInfo(), *MI);
     }
 
-    // Bug: gas intel_syntax mode treats "fistp QWORD PTR [...]" as
-    // an invalid opcode, saying "64 bit operations are only
-    // supported in 64 bit modes." libopcodes disassembles it as
-    // "fistpll DWORD PTR [...]", which is wrong. Workaround: Output
-    // "fistpll DWORD PTR " instead, which is what libopcodes is
-    // expecting to see.
-    if (MI->getOpcode() == X86::FISTP64m) {
-      O << "fistpll DWORD PTR ";
-      printMemReference(MI, 0);
-      if (MI->getNumOperands() == 5) {
-	O << ", ";
-	printOp(MI->getOperand(4));
-      }
-      O << "\t# ";
-    }
-    
     O << TII.getName(MI->getOpcode()) << " ";
     O << sizePtr(Desc) << " ";
     printMemReference(MI, 0);
@@ -851,10 +841,10 @@
       O << ", ";
       printOp(MI->getOperand(4));
     }
+    checkImplUses(Desc);
     O << "\n";
     return;
   }
-
   default:
     O << "\tUNKNOWN FORM:\t\t-"; MI->print(O, TM); break;
   }


Index: llvm/lib/Target/X86/X86CodeEmitter.cpp
diff -u llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46.2.1 llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46.2.2
--- llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86CodeEmitter.cpp	Wed Mar 10 19:01:46 2004
@@ -168,7 +168,6 @@
 }
 
 
-
 namespace {
   class Emitter : public MachineFunctionPass {
     const X86InstrInfo  *II;
@@ -176,7 +175,9 @@
     std::map<const BasicBlock*, unsigned> BasicBlockAddrs;
     std::vector<std::pair<const BasicBlock*, unsigned> > BBRefs;
   public:
-    Emitter(MachineCodeEmitter &mce) : II(0), MCE(mce) {}
+    explicit Emitter(MachineCodeEmitter &mce) : II(0), MCE(mce) {}
+    Emitter(MachineCodeEmitter &mce, const X86InstrInfo& ii)
+        : II(&ii), MCE(mce) {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -184,11 +185,12 @@
       return "X86 Machine Code Emitter";
     }
 
+    void emitInstruction(const MachineInstr &MI);
+
   private:
-    void emitBasicBlock(MachineBasicBlock &MBB);
-    void emitInstruction(MachineInstr &MI);
+    void emitBasicBlock(const MachineBasicBlock &MBB);
 
-    void emitPCRelativeBlockAddress(BasicBlock *BB);
+    void emitPCRelativeBlockAddress(const BasicBlock *BB);
     void emitMaybePCRelativeValue(unsigned Address, bool isPCRelative);
     void emitGlobalAddressForCall(GlobalValue *GV);
     void emitGlobalAddressForPtr(GlobalValue *GV);
@@ -203,6 +205,14 @@
   };
 }
 
+// This function is required by Printer.cpp to workaround gas bugs
+void llvm::X86::emitInstruction(MachineCodeEmitter& mce,
+                                const X86InstrInfo& ii,
+                                const MachineInstr& mi)
+{
+    Emitter(mce, ii).emitInstruction(mi);
+}
+
 /// addPassesToEmitMachineCode - Add passes to the specified pass manager to get
 /// machine code emitted.  This uses a MachineCodeEmitter object to handle
 /// actually outputting the machine code and resolving things like the address
@@ -237,11 +247,11 @@
   return false;
 }
 
-void Emitter::emitBasicBlock(MachineBasicBlock &MBB) {
+void Emitter::emitBasicBlock(const MachineBasicBlock &MBB) {
   if (uint64_t Addr = MCE.getCurrentPCValue())
     BasicBlockAddrs[MBB.getBasicBlock()] = Addr;
 
-  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
+  for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
     emitInstruction(*I);
 }
 
@@ -251,7 +261,7 @@
 /// (because this is a forward branch), it keeps track of the information
 /// necessary to resolve this address later (and emits a dummy value).
 ///
-void Emitter::emitPCRelativeBlockAddress(BasicBlock *BB) {
+void Emitter::emitPCRelativeBlockAddress(const BasicBlock *BB) {
   // FIXME: Emit backward branches directly
   BBRefs.push_back(std::make_pair(BB, MCE.getCurrentPCValue()));
   MCE.emitWord(0);   // Emit a dummy value
@@ -476,7 +486,7 @@
   }
 }
 
-void Emitter::emitInstruction(MachineInstr &MI) {
+void Emitter::emitInstruction(const MachineInstr &MI) {
   NumEmitted++;  // Keep track of the # of mi's emitted
 
   unsigned Opcode = MI.getOpcode();
@@ -516,7 +526,7 @@
   case X86II::RawFrm:
     MCE.emitByte(BaseOpcode);
     if (MI.getNumOperands() == 1) {
-      MachineOperand &MO = MI.getOperand(0);
+      const MachineOperand &MO = MI.getOperand(0);
       if (MO.isPCRelativeDisp()) {
         // Conditional branch... FIXME: this should use an MBB destination!
         emitPCRelativeBlockAddress(cast<BasicBlock>(MO.getVRegValue()));
@@ -536,7 +546,7 @@
   case X86II::AddRegFrm:
     MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(0).getReg()));
     if (MI.getNumOperands() == 2) {
-      MachineOperand &MO1 = MI.getOperand(1);
+      const MachineOperand &MO1 = MI.getOperand(1);
       if (Value *V = MO1.getVRegValueOrNull()) {
 	assert(sizeOfImm(Desc) == 4 && "Don't know how to emit non-pointer values!");
         emitGlobalAddressForPtr(cast<GlobalValue>(V));


Index: llvm/lib/Target/X86/X86InstrBuilder.h
diff -u llvm/lib/Target/X86/X86InstrBuilder.h:1.9.4.1 llvm/lib/Target/X86/X86InstrBuilder.h:1.9.4.2
--- llvm/lib/Target/X86/X86InstrBuilder.h:1.9.4.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86InstrBuilder.h	Wed Mar 10 19:01:46 2004
@@ -54,6 +54,7 @@
                                                  unsigned Scale,
                                                  unsigned IndexReg,
                                                  unsigned Disp) {
+  assert (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
   return MIB.addReg(BaseReg).addZImm(Scale).addReg(IndexReg).addSImm(Disp);
 }
 


Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.15.2.1 llvm/lib/Target/X86/X86InstrInfo.td:1.15.2.2
--- llvm/lib/Target/X86/X86InstrInfo.td:1.15.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86InstrInfo.td	Wed Mar 10 19:01:46 2004
@@ -53,7 +53,7 @@
 def Mem16  : MemType<2>;
 def Mem32  : MemType<3>;
 def Mem64  : MemType<4>;
-def Mem80  : MemType<4>;
+def Mem80  : MemType<5>;
 def Mem128 : MemType<6>;
 
 // FPFormat - This specifies what form this FP instruction has.  This is used by
@@ -294,9 +294,12 @@
 
 // Conditional moves.  These are modelled as X = cmovXX Y, Z.  Eventually
 // register allocated to cmovXX XY, Z
-def CMOVE16rr : I<"cmove", 0x44, MRMSrcReg>, TB, OpSize;        // if ==, R16 = R16
-def CMOVNE32rr: I<"cmovne",0x45, MRMSrcReg>, TB;                // if !=, R32 = R32
-def CMOVS32rr : I<"cmovs", 0x48, MRMSrcReg>, TB;                // if signed, R32 = R32
+def CMOVE16rr : I   <"cmove", 0x44, MRMSrcReg>, TB, OpSize;        // if ==, R16 = R16
+def CMOVE16rm : Im16<"cmove", 0x44, MRMSrcMem>, TB, OpSize;        // if ==, R16 = [mem16]
+def CMOVNE32rr: I   <"cmovne",0x45, MRMSrcReg>, TB;                // if !=, R32 = R32
+def CMOVNE32rm: Im32<"cmovne",0x45, MRMSrcMem>, TB;                // if !=, R32 = [mem32]
+def CMOVS32rr : I   <"cmovs", 0x48, MRMSrcReg>, TB;                // if signed, R32 = R32
+def CMOVS32rm : Im32<"cmovs", 0x48, MRMSrcMem>, TB;                // if signed, R32 = [mem32]
 
 // unary instructions
 def NEG8r  : I   <"neg", 0xF6, MRM3r>;         // R8  = -R8  = 0-R8
@@ -397,6 +400,7 @@
 def XOR32mi8 : Im32i8<"xor", 0x83, MRM6m     >;            // [mem32] ^= imm8
 
 // Shift instructions
+// FIXME: provide shorter instructions when imm8 == 1
 def SHL8rCL  : I     <"shl", 0xD2, MRM4r     >        , UsesCL; // R8  <<= cl
 def SHL16rCL : I     <"shl", 0xD3, MRM4r     >, OpSize, UsesCL; // R16 <<= cl
 def SHL32rCL : I     <"shl", 0xD3, MRM4r     >        , UsesCL; // R32 <<= cl


Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
diff -u llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40.4.1 llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40.4.2
--- llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40.4.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp	Wed Mar 10 19:01:46 2004
@@ -260,6 +260,9 @@
     case X86::MOV8rr:  NI = MakeRMInst(X86::MOV8rm , FrameIndex, MI); break;
     case X86::MOV16rr: NI = MakeRMInst(X86::MOV16rm, FrameIndex, MI); break;
     case X86::MOV32rr: NI = MakeRMInst(X86::MOV32rm, FrameIndex, MI); break;
+    case X86::CMOVE16rr:  NI = MakeRMInst(X86::CMOVE16rm , FrameIndex, MI); break;
+    case X86::CMOVNE32rr: NI = MakeRMInst(X86::CMOVNE32rm, FrameIndex, MI); break;
+    case X86::CMOVS32rr:  NI = MakeRMInst(X86::CMOVS32rm , FrameIndex, MI); break;
     case X86::ADD8rr:  NI = MakeRMInst(X86::ADD8rm , FrameIndex, MI); break;
     case X86::ADD16rr: NI = MakeRMInst(X86::ADD16rm, FrameIndex, MI); break;
     case X86::ADD32rr: NI = MakeRMInst(X86::ADD32rm, FrameIndex, MI); break;


Index: llvm/lib/Target/X86/X86TargetMachine.cpp
diff -u llvm/lib/Target/X86/X86TargetMachine.cpp:1.44.2.1 llvm/lib/Target/X86/X86TargetMachine.cpp:1.44.2.2
--- llvm/lib/Target/X86/X86TargetMachine.cpp:1.44.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86TargetMachine.cpp	Wed Mar 10 19:01:46 2004
@@ -25,8 +25,6 @@
 using namespace llvm;
 
 namespace {
-  cl::opt<bool> PrintCode("print-machineinstrs",
-			  cl::desc("Print generated machine code"));
   cl::opt<bool> NoPatternISel("disable-pattern-isel", cl::init(true),
                         cl::desc("Use the 'simple' X86 instruction selector"));
   cl::opt<bool> NoSSAPeephole("disable-ssa-peephole", cl::init(true),
@@ -79,18 +77,18 @@
     PM.add(createX86SSAPeepholeOptimizerPass());
 
   // Print the instruction selected machine code...
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   // Perform register allocation to convert to a concrete x86 representation
   PM.add(createRegisterAllocator());
 
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   PM.add(createX86FloatingPointStackifierPass());
 
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
@@ -98,7 +96,7 @@
 
   PM.add(createX86PeepholeOptimizerPass());
 
-  if (PrintCode)  // Print the register-allocated code
+  if (PrintMachineCode)  // Print the register-allocated code
     PM.add(createX86CodePrinterPass(std::cerr, *this));
 
   if (!DisableOutput)
@@ -138,18 +136,18 @@
   // FIXME: Add SSA based peephole optimizer here.
 
   // Print the instruction selected machine code...
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   // Perform register allocation to convert to a concrete x86 representation
   PM.add(createRegisterAllocator());
 
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   PM.add(createX86FloatingPointStackifierPass());
 
-  if (PrintCode)
+  if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(&std::cerr));
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
@@ -157,7 +155,7 @@
 
   PM.add(createX86PeepholeOptimizerPass());
 
-  if (PrintCode)  // Print the register-allocated code
+  if (PrintMachineCode)  // Print the register-allocated code
     PM.add(createX86CodePrinterPass(std::cerr, TM));
 }
 


Index: llvm/lib/Target/X86/X86TargetMachine.h
diff -u llvm/lib/Target/X86/X86TargetMachine.h:1.21.2.1 llvm/lib/Target/X86/X86TargetMachine.h:1.21.2.2
--- llvm/lib/Target/X86/X86TargetMachine.h:1.21.2.1	Mon Mar  1 17:58:15 2004
+++ llvm/lib/Target/X86/X86TargetMachine.h	Wed Mar 10 19:01:46 2004
@@ -53,6 +53,13 @@
   virtual bool addPassesToEmitAssembly(PassManager &PM, std::ostream &Out);
 };
 
+  // this is implemented in X86CodeEmitter.cpp
+  namespace X86 {
+    void emitInstruction(MachineCodeEmitter& mce,
+                         const X86InstrInfo& ii,
+                         const MachineInstr& MI);
+  }
+
 } // End llvm namespace
 
 #endif