[llvm-commits] [parallel] CVS: llvm/lib/Target/X86/FloatingPoint.cpp InstSelectSimple.cpp PeepholeOptimizer.cpp Printer.cpp README.txt X86.td X86CodeEmitter.cpp X86InstrBuilder.h X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86RegisterInfo.cpp X86RegisterInfo.h X86RegisterInfo.td X86TargetMachine.cpp X86TargetMachine.h
Misha Brukman
brukman at cs.uiuc.edu
Mon Mar 1 18:05:56 PST 2004
Changes in directory llvm/lib/Target/X86:
FloatingPoint.cpp updated: 1.16 -> 1.16.2.1
InstSelectSimple.cpp updated: 1.149 -> 1.149.2.1
PeepholeOptimizer.cpp updated: 1.9 -> 1.9.2.1
Printer.cpp updated: 1.76 -> 1.76.2.1
README.txt updated: 1.10 -> 1.10.6.1
X86.td updated: 1.7 -> 1.7.6.1
X86CodeEmitter.cpp updated: 1.46 -> 1.46.2.1
X86InstrBuilder.h updated: 1.9 -> 1.9.4.1
X86InstrInfo.cpp updated: 1.18 -> 1.18.2.1
X86InstrInfo.h updated: 1.30 -> 1.30.2.1
X86InstrInfo.td updated: 1.15 -> 1.15.2.1
X86RegisterInfo.cpp updated: 1.40 -> 1.40.4.1
X86RegisterInfo.h updated: 1.18 -> 1.18.4.1
X86RegisterInfo.td updated: 1.8 -> 1.8.6.1
X86TargetMachine.cpp updated: 1.44 -> 1.44.2.1
X86TargetMachine.h updated: 1.21 -> 1.21.2.1
---
Log message:
Merge from trunk
---
Diffs of the changes: (+2300 -1128)
Index: llvm/lib/Target/X86/FloatingPoint.cpp
diff -u llvm/lib/Target/X86/FloatingPoint.cpp:1.16 llvm/lib/Target/X86/FloatingPoint.cpp:1.16.2.1
--- llvm/lib/Target/X86/FloatingPoint.cpp:1.16 Sat Dec 20 10:22:59 2003
+++ llvm/lib/Target/X86/FloatingPoint.cpp Mon Mar 1 17:58:15 2004
@@ -8,7 +8,23 @@
//===----------------------------------------------------------------------===//
//
// This file defines the pass which converts floating point instructions from
-// virtual registers into register stack instructions.
+// virtual registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// This pass is hampered by the lack of decent CFG manipulation routines for
+// machine code. In particular, this wants to be able to split critical edges
+// as necessary, traverse the machine basic block CFG in depth-first order, and
+// allow there to be multiple machine basic blocks for each LLVM basicblock
+// (needed for critical edge splitting).
+//
+// In particular, this pass currently barfs on critical edges. Because of this,
+// it requires the instruction selector to insert FP_REG_KILL instructions on
+// the exits of any basic block that has critical edges going from it, or which
+// branch to a critical basic block.
+//
+// FIXME: this is not implemented yet. The stackifier pass only works on local
+// basic blocks.
//
//===----------------------------------------------------------------------===//
@@ -21,10 +37,14 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Function.h" // FIXME: remove when using MBB CFG!
+#include "llvm/Support/CFG.h" // FIXME: remove when using MBB CFG!
#include "Support/Debug.h"
+#include "Support/DepthFirstIterator.h"
#include "Support/Statistic.h"
+#include "Support/STLExtras.h"
#include <algorithm>
-#include <iostream>
+#include <set>
using namespace llvm;
namespace {
@@ -75,7 +95,7 @@
return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
}
- // pushReg - Push the specifiex FP<n> register onto the stack
+ // pushReg - Push the specified FP<n> register onto the stack
void pushReg(unsigned Reg) {
assert(Reg < 8 && "Register number out of range!");
assert(StackTop < 8 && "Stack overflow!");
@@ -99,7 +119,7 @@
// Emit an fxch to update the runtime processors version of the state
MachineInstr *MI = BuildMI(X86::FXCH, 1).addReg(STReg);
- I = 1+MBB->insert(I, MI);
+ MBB->insert(I, MI);
NumFXCH++;
}
}
@@ -110,7 +130,7 @@
pushReg(AsReg); // New register on top of stack
MachineInstr *MI = BuildMI(X86::FLDrr, 1).addReg(STReg);
- I = 1+MBB->insert(I, MI);
+ MBB->insert(I, MI);
}
// popStackAfter - Pop the current value off of the top of the FP stack
@@ -121,6 +141,7 @@
void handleZeroArgFP(MachineBasicBlock::iterator &I);
void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
void handleTwoArgFP(MachineBasicBlock::iterator &I);
void handleSpecialFP(MachineBasicBlock::iterator &I);
};
@@ -135,9 +156,32 @@
LV = &getAnalysis<LiveVariables>();
StackTop = 0;
- bool Changed = false;
+ // Figure out the mapping of MBB's to BB's.
+ //
+ // FIXME: Eventually we should be able to traverse the MBB CFG directly, and
+ // we will need to extend this when one llvm basic block can codegen to
+ // multiple MBBs.
+ //
+ // FIXME again: Just use the mapping established by LiveVariables!
+ //
+ std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= processBasicBlock(MF, *I);
+ MBBMap[I->getBasicBlock()] = I;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ std::set<const BasicBlock*> Processed;
+ const BasicBlock *Entry = MF.getFunction()->begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<const BasicBlock*, std::set<const BasicBlock*> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, *MBBMap[*I]);
+
+ assert(MBBMap.size() == Processed.size() &&
+ "Doesn't handle unreachable code yet!");
+
return Changed;
}
@@ -150,11 +194,14 @@
MBB = &BB;
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
- MachineInstr *MI = *I;
- MachineInstr *PrevMI = I == BB.begin() ? 0 : *(I-1);
+ MachineInstr *MI = I;
unsigned Flags = TII.get(MI->getOpcode()).TSFlags;
+ if ((Flags & X86II::FPTypeMask) == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
- if ((Flags & X86II::FPTypeMask) == 0) continue; // Ignore non-fp insts!
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
++NumFP; // Keep track of # of pseudo instrs
DEBUG(std::cerr << "\nFPInst:\t";
@@ -176,14 +223,11 @@
});
switch (Flags & X86II::FPTypeMask) {
- case X86II::ZeroArgFP: handleZeroArgFP(I); break;
- case X86II::OneArgFP: handleOneArgFP(I); break;
-
- case X86II::OneArgFPRW: // ST(0) = fsqrt(ST(0))
- assert(0 && "FP instr type not handled yet!");
-
- case X86II::TwoArgFP: handleTwoArgFP(I); break;
- case X86II::SpecialFP: handleSpecialFP(I); break;
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
default: assert(0 && "Unknown FP Type!");
}
@@ -201,18 +245,20 @@
}
// Print out all of the instructions expanded to if -debug
- DEBUG(if (*I == PrevMI) {
- std::cerr<< "Just deleted pseudo instruction\n";
- } else {
- MachineBasicBlock::iterator Start = I;
- // Rewind to first instruction newly inserted.
- while (Start != BB.begin() && *(Start-1) != PrevMI) --Start;
- std::cerr << "Inserted instructions:\n\t";
- (*Start)->print(std::cerr, MF.getTarget());
- while (++Start != I+1);
- }
- dumpStack();
- );
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ std::cerr<< "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ std::cerr << "Inserted instructions:\n\t";
+ Start->print(std::cerr, MF.getTarget());
+ while (++Start != next(I));
+ }
+ dumpStack();
+ );
Changed = true;
}
@@ -275,13 +321,13 @@
{ X86::FDIVRrST0, X86::FDIVRPrST0 },
{ X86::FDIVrST0 , X86::FDIVPrST0 },
- { X86::FISTr16 , X86::FISTPr16 },
- { X86::FISTr32 , X86::FISTPr32 },
+ { X86::FIST16m , X86::FISTP16m },
+ { X86::FIST32m , X86::FISTP32m },
{ X86::FMULrST0 , X86::FMULPrST0 },
- { X86::FSTr32 , X86::FSTPr32 },
- { X86::FSTr64 , X86::FSTPr64 },
+ { X86::FST32m , X86::FSTP32m },
+ { X86::FST64m , X86::FSTP64m },
{ X86::FSTrr , X86::FSTPrr },
{ X86::FSUBRrST0, X86::FSUBRPrST0 },
@@ -303,20 +349,20 @@
RegMap[Stack[--StackTop]] = ~0; // Update state
// Check to see if there is a popping version of this instruction...
- int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), (*I)->getOpcode());
+ int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), I->getOpcode());
if (Opcode != -1) {
- (*I)->setOpcode(Opcode);
+ I->setOpcode(Opcode);
if (Opcode == X86::FUCOMPPr)
- (*I)->RemoveOperand(0);
+ I->RemoveOperand(0);
} else { // Insert an explicit pop
MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(X86::ST0);
- I = MBB->insert(I+1, MI);
+ I = MBB->insert(++I, MI);
}
}
static unsigned getFPReg(const MachineOperand &MO) {
- assert(MO.isPhysicalRegister() && "Expected an FP register!");
+ assert(MO.isRegister() && "Expected an FP register!");
unsigned Reg = MO.getReg();
assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
return Reg - X86::FP0;
@@ -328,9 +374,9 @@
//===----------------------------------------------------------------------===//
/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
-//
+///
void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
- MachineInstr *MI = *I;
+ MachineInstr *MI = I;
unsigned DestReg = getFPReg(MI->getOperand(0));
MI->RemoveOperand(0); // Remove the explicit ST(0) operand
@@ -338,32 +384,34 @@
pushReg(DestReg);
}
-/// handleOneArgFP - fst ST(0), <mem>
-//
+/// handleOneArgFP - fst <mem>, ST(0)
+///
void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
- MachineInstr *MI = *I;
- assert(MI->getNumOperands() == 5 && "Can only handle fst* instructions!");
+ MachineInstr *MI = I;
+ assert((MI->getNumOperands() == 5 || MI->getNumOperands() == 1) &&
+ "Can only handle fst* & ftst instructions!");
- unsigned Reg = getFPReg(MI->getOperand(4));
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(MI->getNumOperands()-1));
bool KillsSrc = false;
for (LiveVariables::killed_iterator KI = LV->killed_begin(MI),
E = LV->killed_end(MI); KI != E; ++KI)
KillsSrc |= KI->second == X86::FP0+Reg;
- // FSTPr80 and FISTPr64 are strange because there are no non-popping versions.
+ // FSTP80r and FISTP64r are strange because there are no non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
// always ok.
//
- if ((MI->getOpcode() == X86::FSTPr80 ||
- MI->getOpcode() == X86::FISTPr64) && !KillsSrc) {
+ if ((MI->getOpcode() == X86::FSTP80m ||
+ MI->getOpcode() == X86::FISTP64m) && !KillsSrc) {
duplicateToTop(Reg, 7 /*temp register*/, I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
- MI->RemoveOperand(4); // Remove explicit ST(0) operand
+ MI->RemoveOperand(MI->getNumOperands()-1); // Remove explicit ST(0) operand
- if (MI->getOpcode() == X86::FSTPr80 || MI->getOpcode() == X86::FISTPr64) {
+ if (MI->getOpcode() == X86::FSTP80m || MI->getOpcode() == X86::FISTP64m) {
assert(StackTop > 0 && "Stack empty??");
--StackTop;
} else if (KillsSrc) { // Last use of operand?
@@ -371,6 +419,38 @@
}
}
+
+/// handleOneArgFPRW - fchs - ST(0) = -ST(0)
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ assert(MI->getNumOperands() == 2 && "Can only handle fst* instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = false;
+ for (LiveVariables::killed_iterator KI = LV->killed_begin(MI),
+ E = LV->killed_end(MI); KI != E; ++KI)
+ KillsSrc |= KI->second == X86::FP0+Reg;
+
+ if (KillsSrc) {
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ assert(StackTop > 0 && "Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+}
+
+
//===----------------------------------------------------------------------===//
// Define tables of various ways to map pseudo instructions
//
@@ -428,7 +508,7 @@
void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
- MachineInstr *MI = *I;
+ MachineInstr *MI = I;
unsigned NumOperands = MI->getNumOperands();
assert(NumOperands == 3 ||
@@ -513,7 +593,8 @@
unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
// Replace the old instruction with a new instruction
- *I = BuildMI(Opcode, 1).addReg(getSTReg(NotTOS));
+ MBB->remove(I);
+ I = MBB->insert(I, BuildMI(Opcode, 1).addReg(getSTReg(NotTOS)));
// If both operands are killed, pop one off of the stack in addition to
// overwriting the other one.
@@ -542,7 +623,7 @@
Stack[--StackTop] = ~0;
MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(STReg);
- I = MBB->insert(I+1, MI);
+ I = MBB->insert(++I, MI);
}
}
}
@@ -564,7 +645,7 @@
/// instructions.
///
void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
- MachineInstr *MI = *I;
+ MachineInstr *MI = I;
switch (MI->getOpcode()) {
default: assert(0 && "Unknown SpecialFP instruction!");
case X86::FpGETRESULT: // Appears immediately after a call returning FP type!
@@ -600,6 +681,6 @@
}
}
- I = MBB->erase(I)-1; // Remove the pseudo instruction
- delete MI;
+ I = MBB->erase(I); // Remove the pseudo instruction
+ --I;
}
Index: llvm/lib/Target/X86/InstSelectSimple.cpp
diff -u llvm/lib/Target/X86/InstSelectSimple.cpp:1.149 llvm/lib/Target/X86/InstSelectSimple.cpp:1.149.2.1
--- llvm/lib/Target/X86/InstSelectSimple.cpp:1.149 Mon Jan 12 01:22:45 2004
+++ llvm/lib/Target/X86/InstSelectSimple.cpp Mon Mar 1 17:58:15 2004
@@ -23,44 +23,27 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/MRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/CFG.h"
+#include "Support/Statistic.h"
using namespace llvm;
-/// BMI - A special BuildMI variant that takes an iterator to insert the
-/// instruction at as well as a basic block. This is the version for when you
-/// have a destination register in mind.
-inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &I,
- int Opcode, unsigned NumOperands,
- unsigned DestReg) {
- assert(I >= MBB->begin() && I <= MBB->end() && "Bad iterator!");
- MachineInstr *MI = new MachineInstr(Opcode, NumOperands+1, true, true);
- I = MBB->insert(I, MI)+1;
- return MachineInstrBuilder(MI).addReg(DestReg, MOTy::Def);
-}
-
-/// BMI - A special BuildMI variant that takes an iterator to insert the
-/// instruction at as well as a basic block.
-inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &I,
- int Opcode, unsigned NumOperands) {
- assert(I >= MBB->begin() && I <= MBB->end() && "Bad iterator!");
- MachineInstr *MI = new MachineInstr(Opcode, NumOperands, true, true);
- I = MBB->insert(I, MI)+1;
- return MachineInstrBuilder(MI);
+namespace {
+ Statistic<>
+ NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
}
-
namespace {
struct ISel : public FunctionPass, InstVisitor<ISel> {
TargetMachine &TM;
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
int VarArgsFrameIndex; // FrameIndex for start of varargs area
+ int ReturnAddressIndex; // FrameIndex for the return address
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
@@ -85,6 +68,10 @@
BB = &F->front();
+ // Set up a frame object for the return address. This is used by the
+ // llvm.returnaddress & llvm.frameaddress intrinisics.
+ ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
+
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
@@ -94,6 +81,9 @@
// Select the PHI nodes
SelectPHINodes();
+ // Insert the FP_REG_KILL instructions into blocks that need them.
+ InsertFPRegKills();
+
RegMap.clear();
MBBMap.clear();
F = 0;
@@ -130,6 +120,12 @@
///
void SelectPHINodes();
+ /// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks
+ /// that need them. This only occurs due to the floating point stackifier
+ /// not being aggressive enough to handle arbitrary global stackification.
+ ///
+ void InsertFPRegKills();
+
// Visitation methods for various instructions. These methods simply emit
// fixed X86 code for each instruction.
//
@@ -154,11 +150,11 @@
void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
- void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
+ void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Reg);
void doMultiplyConst(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Val);
void visitMul(BinaryOperator &B);
@@ -176,7 +172,7 @@
void visitSetCondInst(SetCondInst &I);
unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &MBBI);
+ MachineBasicBlock::iterator MBBI);
// Memory Instructions
void visitLoadInst(LoadInst &I);
@@ -202,41 +198,55 @@
///
void promote32(unsigned targetReg, const ValueRecord &VR);
+ // getGEPIndex - This is used to fold GEP instructions into X86 addressing
+ // expressions.
+ void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+ std::vector<Value*> &GEPOps,
+ std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+ unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
+ /// isGEPFoldable - Return true if the specified GEP can be completely
+ /// folded into the addressing mode of a load/store or lea instruction.
+ bool isGEPFoldable(MachineBasicBlock *MBB,
+ Value *Src, User::op_iterator IdxBegin,
+ User::op_iterator IdxEnd, unsigned &BaseReg,
+ unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
/// emitGEPOperation - Common code shared between visitGetElementPtrInst and
/// constant expression GEP support.
///
- void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator&IP,
+ void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg);
/// emitCastOperation - Common code shared between visitCastInst and
/// constant expression cast support.
- void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator&IP,
+ void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
Value *Src, const Type *DestTy, unsigned TargetReg);
/// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
/// and constant expression support.
void emitSimpleBinaryOperation(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned TargetReg);
void emitDivRemOperation(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
unsigned Op0Reg, unsigned Op1Reg, bool isDiv,
const Type *Ty, unsigned TargetReg);
/// emitSetCCOperation - Common code shared between visitSetCondInst and
/// constant expression support.
void emitSetCCOperation(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned Opcode,
unsigned TargetReg);
/// emitShiftOperation - Common code shared between visitShiftInst and
/// constant expression support.
void emitShiftOperation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op, Value *ShiftAmount, bool isLeftShift,
const Type *ResultTy, unsigned DestReg);
@@ -245,7 +255,7 @@
/// specified constant into the specified register.
///
void copyConstantToRegister(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MBBI,
Constant *C, unsigned Reg);
/// makeAnotherReg - This method returns the next register number we haven't
@@ -285,7 +295,7 @@
return getReg(V, BB, It);
}
unsigned getReg(Value *V, MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IPt) {
+ MachineBasicBlock::iterator IPt) {
unsigned &Reg = RegMap[V];
if (Reg == 0) {
Reg = makeAnotherReg(V->getType());
@@ -300,7 +310,7 @@
RegMap.erase(V); // Assign a new name to this constant if ref'd again
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Move the address of the global into the register
- BMI(MBB, IPt, X86::MOVir32, 1, Reg).addGlobalAddress(GV);
+ BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV);
RegMap.erase(V); // Assign a new name to this address if ref'd again
}
@@ -351,7 +361,7 @@
/// specified constant into the specified register.
///
void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Constant *C, unsigned R) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
unsigned Class = 0;
@@ -417,29 +427,28 @@
if (Class == cLong) {
// Copy the value into the register pair.
uint64_t Val = cast<ConstantInt>(C)->getRawValue();
- BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(Val & 0xFFFFFFFF);
- BMI(MBB, IP, X86::MOVir32, 1, R+1).addZImm(Val >> 32);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32);
return;
}
assert(Class <= cInt && "Type not handled yet!");
static const unsigned IntegralOpcodeTab[] = {
- X86::MOVir8, X86::MOVir16, X86::MOVir32
+ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
};
if (C->getType() == Type::BoolTy) {
- BMI(MBB, IP, X86::MOVir8, 1, R).addZImm(C == ConstantBool::True);
+ BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True);
} else {
ConstantInt *CI = cast<ConstantInt>(C);
- BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CI->getRawValue());
+ BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue());
}
} else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
- double Value = CFP->getValue();
- if (Value == +0.0)
- BMI(MBB, IP, X86::FLD0, 0, R);
- else if (Value == +1.0)
- BMI(MBB, IP, X86::FLD1, 0, R);
+ if (CFP->isExactlyValue(+0.0))
+ BuildMI(*MBB, IP, X86::FLD0, 0, R);
+ else if (CFP->isExactlyValue(+1.0))
+ BuildMI(*MBB, IP, X86::FLD1, 0, R);
else {
// Otherwise we need to spill the constant to memory...
MachineConstantPool *CP = F->getConstantPool();
@@ -447,16 +456,15 @@
const Type *Ty = CFP->getType();
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
- unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLDr32 : X86::FLDr64;
- addConstantPoolReference(BMI(MBB, IP, LoadOpcode, 4, R), CPI);
+ unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
+ addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
}
} else if (isa<ConstantPointerNull>(C)) {
// Copy zero (null pointer) to the register.
- BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(0);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0);
} else if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(C)) {
- unsigned SrcReg = getReg(CPR->getValue(), MBB, IP);
- BMI(MBB, IP, X86::MOVrr32, 1, R).addReg(SrcReg);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(CPR->getValue());
} else {
std::cerr << "Offending constant: " << C << "\n";
assert(0 && "Type not handled yet!");
@@ -485,29 +493,29 @@
switch (getClassB(I->getType())) {
case cByte:
FI = MFI->CreateFixedObject(1, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOVmr8, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
break;
case cShort:
FI = MFI->CreateFixedObject(2, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOVmr16, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
break;
case cInt:
FI = MFI->CreateFixedObject(4, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
break;
case cLong:
FI = MFI->CreateFixedObject(8, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
- addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg+1), FI, 4);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
ArgOffset += 4; // longs require 4 additional bytes
break;
case cFP:
unsigned Opcode;
if (I->getType() == Type::FloatTy) {
- Opcode = X86::FLDr32;
+ Opcode = X86::FLD32m;
FI = MFI->CreateFixedObject(4, ArgOffset);
} else {
- Opcode = X86::FLDr64;
+ Opcode = X86::FLD64m;
FI = MFI->CreateFixedObject(8, ArgOffset);
ArgOffset += 4; // doubles require 4 additional bytes
}
@@ -536,23 +544,22 @@
const Function &LF = *F->getFunction(); // The LLVM function...
for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
const BasicBlock *BB = I;
- MachineBasicBlock *MBB = MBBMap[I];
+ MachineBasicBlock &MBB = *MBBMap[I];
// Loop over all of the PHI nodes in the LLVM basic block...
- unsigned NumPHIs = 0;
+ MachineBasicBlock::iterator PHIInsertPoint = MBB.begin();
for (BasicBlock::const_iterator I = BB->begin();
PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I)); ++I) {
// Create a new machine instr PHI node, and insert it.
unsigned PHIReg = getReg(*PN);
- MachineInstr *PhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg);
- MBB->insert(MBB->begin()+NumPHIs++, PhiMI);
+ MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint,
+ X86::PHI, PN->getNumOperands(), PHIReg);
MachineInstr *LongPhiMI = 0;
- if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy) {
- LongPhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg+1);
- MBB->insert(MBB->begin()+NumPHIs++, LongPhiMI);
- }
+ if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy)
+ LongPhiMI = BuildMI(MBB, PHIInsertPoint,
+ X86::PHI, PN->getNumOperands(), PHIReg+1);
// PHIValues - Map of blocks to incoming virtual registers. We use this
// so that we only initialize one incoming value for a particular block,
@@ -586,7 +593,7 @@
MachineBasicBlock::iterator PI = PredMBB->begin();
// Skip over any PHI nodes though!
- while (PI != PredMBB->end() && (*PI)->getOpcode() == X86::PHI)
+ while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
++PI;
ValReg = getReg(Val, PredMBB, PI);
@@ -605,10 +612,106 @@
LongPhiMI->addMachineBasicBlockOperand(PredMBB);
}
}
+
+ // Now that we emitted all of the incoming values for the PHI node, make
+ // sure to reposition the InsertPoint after the PHI that we just added.
+ // This is needed because we might have inserted a constant into this
+ // block, right after the PHI's which is before the old insert point!
+ PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI;
+ ++PHIInsertPoint;
}
}
}
+/// RequiresFPRegKill - The floating point stackifier pass cannot insert
+/// compensation code on critical edges. As such, it requires that we kill all
+/// FP registers on the exit from any blocks that either ARE critical edges, or
+/// branch to a block that has incoming critical edges.
+///
+/// Note that this kill instruction will eventually be eliminated when
+/// restrictions in the stackifier are relaxed.
+///
+static bool RequiresFPRegKill(const BasicBlock *BB) {
+#if 0
+ for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
+ const BasicBlock *Succ = *SI;
+ pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ ++PI; // Block have at least one predecessory
+ if (PI != PE) { // If it has exactly one, this isn't crit edge
+ // If this block has more than one predecessor, check all of the
+ // predecessors to see if they have multiple successors. If so, then the
+ // block we are analyzing needs an FPRegKill.
+ for (PI = pred_begin(Succ); PI != PE; ++PI) {
+ const BasicBlock *Pred = *PI;
+ succ_const_iterator SI2 = succ_begin(Pred);
+ ++SI2; // There must be at least one successor of this block.
+ if (SI2 != succ_end(Pred))
+ return true; // Yes, we must insert the kill on this edge.
+ }
+ }
+ }
+ // If we got this far, there is no need to insert the kill instruction.
+ return false;
+#else
+ return true;
+#endif
+}
+
+// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that
+// need them. This only occurs due to the floating point stackifier not being
+// aggressive enough to handle arbitrary global stackification.
+//
+// Currently we insert an FP_REG_KILL instruction into each block that uses or
+// defines a floating point virtual register.
+//
+// When the global register allocators (like linear scan) finally update live
+// variable analysis, we can keep floating point values in registers across
+// portions of the CFG that do not involve critical edges. This will be a big
+// win, but we are waiting on the global allocators before we can do this.
+//
+// With a bit of work, the floating point stackifier pass can be enhanced to
+// break critical edges as needed (to make a place to put compensation code),
+// but this will require some infrastructure improvements as well.
+//
+void ISel::InsertFPRegKills() {
+ SSARegMap &RegMap = *F->getSSARegMap();
+
+ for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ if (MO.isRegister() && MO.getReg()) {
+ unsigned Reg = MO.getReg();
+ if (MRegisterInfo::isVirtualRegister(Reg))
+ if (RegMap.getRegClass(Reg)->getSize() == 10)
+ goto UsesFPReg;
+ }
+ }
+ // If we haven't found an FP register use or def in this basic block, check
+ // to see if any of our successors has an FP PHI node, which will cause a
+ // copy to be inserted into this block.
+ for (succ_const_iterator SI = succ_begin(BB->getBasicBlock()),
+ E = succ_end(BB->getBasicBlock()); SI != E; ++SI) {
+ MachineBasicBlock *SBB = MBBMap[*SI];
+ for (MachineBasicBlock::iterator I = SBB->begin();
+ I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
+ if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
+ goto UsesFPReg;
+ }
+ }
+ continue;
+ UsesFPReg:
+ // Okay, this block uses an FP register. If the block has successors (ie,
+ // it's not an unwind/return), insert the FP_REG_KILL instruction.
+ if (BB->getBasicBlock()->getTerminator()->getNumSuccessors() &&
+ RequiresFPRegKill(BB->getBasicBlock())) {
+ BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
+ ++NumFPKill;
+ }
+ }
+}
+
+
// canFoldSetCCIntoBranch - Return the setcc instruction if we can fold it into
// the conditional branch instruction which is the only user of the cc
// instruction. This is the case if the conditional branch is the only user of
@@ -664,7 +767,7 @@
// returning the extended setcc code to use.
unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP) {
+ MachineBasicBlock::iterator IP) {
// The arguments are already supposed to be of the same type.
const Type *CompTy = Op0->getType();
unsigned Class = getClassB(CompTy);
@@ -683,9 +786,9 @@
// !=. These should have been strength reduced already anyway.
if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
static const unsigned TESTTab[] = {
- X86::TESTrr8, X86::TESTrr16, X86::TESTrr32
+ X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
};
- BMI(MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
+ BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
if (OpNum == 2) return 6; // Map jl -> js
if (OpNum == 3) return 7; // Map jg -> jns
@@ -693,10 +796,19 @@
}
static const unsigned CMPTab[] = {
- X86::CMPri8, X86::CMPri16, X86::CMPri32
+ X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
};
- BMI(MBB, IP, CMPTab[Class], 2).addReg(Op0r).addZImm(Op1v);
+ BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
+ return OpNum;
+ }
+
+ // Special case handling of comparison against +/- 0.0
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
+ if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
+ BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r);
+ BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
+ BuildMI(*MBB, IP, X86::SAHF, 1);
return OpNum;
}
@@ -707,18 +819,18 @@
// compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
// 32-bit.
case cByte:
- BMI(MBB, IP, X86::CMPrr8, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::CMP8rr, 2).addReg(Op0r).addReg(Op1r);
break;
case cShort:
- BMI(MBB, IP, X86::CMPrr16, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::CMP16rr, 2).addReg(Op0r).addReg(Op1r);
break;
case cInt:
- BMI(MBB, IP, X86::CMPrr32, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
break;
case cFP:
- BMI(MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
- BMI(MBB, IP, X86::FNSTSWr8, 0);
- BMI(MBB, IP, X86::SAHF, 1);
+ BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
+ BuildMI(*MBB, IP, X86::SAHF, 1);
break;
case cLong:
@@ -726,9 +838,9 @@
unsigned LoTmp = makeAnotherReg(Type::IntTy);
unsigned HiTmp = makeAnotherReg(Type::IntTy);
unsigned FinalTmp = makeAnotherReg(Type::IntTy);
- BMI(MBB, IP, X86::XORrr32, 2, LoTmp).addReg(Op0r).addReg(Op1r);
- BMI(MBB, IP, X86::XORrr32, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
- BMI(MBB, IP, X86::ORrr32, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
+ BuildMI(*MBB, IP, X86::XOR32rr, 2, LoTmp).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::XOR32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
+ BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
break; // Allow the sete or setne to be generated from flags set by OR
} else {
// Emit a sequence of code which compares the high and low parts once
@@ -744,13 +856,14 @@
// classes! Until then, hardcode registers so that we can deal with their
// aliases (because we don't have conditional byte moves).
//
- BMI(MBB, IP, X86::CMPrr32, 2).addReg(Op0r).addReg(Op1r);
- BMI(MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
- BMI(MBB, IP, X86::CMPrr32, 2).addReg(Op0r+1).addReg(Op1r+1);
- BMI(MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL);
- BMI(MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
- BMI(MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
- BMI(MBB, IP, X86::CMOVErr16, 2, X86::BX).addReg(X86::BX).addReg(X86::AX);
+ BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
+ BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r+1).addReg(Op1r+1);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
+ BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
+ .addReg(X86::AX);
// NOTE: visitSetCondInst knows that the value is dumped into the BL
// register at this point for long values...
return OpNum;
@@ -775,7 +888,7 @@
/// emitSetCCOperation - Common code shared between visitSetCondInst and
/// constant expression support.
void ISel::emitSetCCOperation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned Opcode,
unsigned TargetReg) {
unsigned OpNum = getSetCCNumber(Opcode);
@@ -787,11 +900,11 @@
if (CompClass != cLong || OpNum < 2) {
// Handle normal comparisons with a setcc instruction...
- BMI(MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
} else {
// Handle long comparisons by copying the value which is already in BL into
// the register we want...
- BMI(MBB, IP, X86::MOVrr8, 1, TargetReg).addReg(X86::BL);
+ BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL);
}
}
@@ -810,20 +923,20 @@
case cByte:
// Extend value into target register (8->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(Reg);
+ BuildMI(BB, X86::MOVZX32rr8, 1, targetReg).addReg(Reg);
else
- BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(Reg);
+ BuildMI(BB, X86::MOVSX32rr8, 1, targetReg).addReg(Reg);
break;
case cShort:
// Extend value into target register (16->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(Reg);
+ BuildMI(BB, X86::MOVZX32rr16, 1, targetReg).addReg(Reg);
else
- BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(Reg);
+ BuildMI(BB, X86::MOVSX32rr16, 1, targetReg).addReg(Reg);
break;
case cInt:
// Move value into target register (32->32)
- BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(Reg);
+ BuildMI(BB, X86::MOV32rr, 1, targetReg).addReg(Reg);
break;
default:
assert(0 && "Unpromotable operand class in promote32");
@@ -843,7 +956,6 @@
///
void ISel::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() == 0) {
- BuildMI(BB, X86::FP_REG_KILL, 0);
BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
return;
}
@@ -864,8 +976,8 @@
BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
break;
case cLong:
- BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(RetReg);
- BuildMI(BB, X86::MOVrr32, 1, X86::EDX).addReg(RetReg+1);
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg);
+ BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1);
// Declare that EAX & EDX are live on exit
BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
.addReg(X86::ESP);
@@ -874,7 +986,6 @@
visitInstruction(I);
}
// Emit a 'ret' instruction
- BuildMI(BB, X86::FP_REG_KILL, 0);
BuildMI(BB, X86::RET, 0);
}
@@ -894,10 +1005,8 @@
BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one
if (!BI.isConditional()) { // Unconditional branch?
- if (BI.getSuccessor(0) != NextBB) {
- BuildMI(BB, X86::FP_REG_KILL, 0);
+ if (BI.getSuccessor(0) != NextBB)
BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
- }
return;
}
@@ -907,8 +1016,7 @@
// Nope, cannot fold setcc into this branch. Emit a branch on a condition
// computed some other way...
unsigned condReg = getReg(BI.getCondition());
- BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0);
- BuildMI(BB, X86::FP_REG_KILL, 0);
+ BuildMI(BB, X86::CMP8ri, 2).addReg(condReg).addImm(0);
if (BI.getSuccessor(1) == NextBB) {
if (BI.getSuccessor(0) != NextBB)
BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0));
@@ -947,7 +1055,6 @@
X86::JS, X86::JNS },
};
- BuildMI(BB, X86::FP_REG_KILL, 0);
if (BI.getSuccessor(0) != NextBB) {
BuildMI(BB, OpcodeTab[isSigned][OpNum], 1).addPCDisp(BI.getSuccessor(0));
if (BI.getSuccessor(1) != NextBB)
@@ -986,41 +1093,57 @@
}
// Adjust the stack pointer for the new arguments...
- BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(NumBytes);
+ BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(NumBytes);
// Arguments go on the stack in reverse order, as specified by the ABI.
unsigned ArgOffset = 0;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- unsigned ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
+ unsigned ArgReg;
switch (getClassB(Args[i].Ty)) {
case cByte:
- case cShort: {
- // Promote arg to 32 bits wide into a temporary register...
- unsigned R = makeAnotherReg(Type::UIntTy);
- promote32(R, Args[i]);
- addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
- X86::ESP, ArgOffset).addReg(R);
+ case cShort:
+ if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
+ // Zero/Sign extend constant, then stuff into memory.
+ ConstantInt *Val = cast<ConstantInt>(Args[i].Val);
+ Val = cast<ConstantInt>(ConstantExpr::getCast(Val, Type::IntTy));
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
+ .addImm(Val->getRawValue() & 0xFFFFFFFF);
+ } else {
+ // Promote arg to 32 bits wide into a temporary register...
+ ArgReg = makeAnotherReg(Type::UIntTy);
+ promote32(ArgReg, Args[i]);
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ }
break;
- }
case cInt:
- addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
- X86::ESP, ArgOffset).addReg(ArgReg);
+ if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
+ unsigned Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset).addImm(Val);
+ } else {
+ ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ }
break;
case cLong:
- addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
X86::ESP, ArgOffset).addReg(ArgReg);
- addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
X86::ESP, ArgOffset+4).addReg(ArgReg+1);
ArgOffset += 4; // 8 byte entry, not 4.
break;
case cFP:
+ ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
if (Args[i].Ty == Type::FloatTy) {
- addRegOffset(BuildMI(BB, X86::FSTr32, 5),
+ addRegOffset(BuildMI(BB, X86::FST32m, 5),
X86::ESP, ArgOffset).addReg(ArgReg);
} else {
assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
- addRegOffset(BuildMI(BB, X86::FSTr64, 5),
+ addRegOffset(BuildMI(BB, X86::FST64m, 5),
X86::ESP, ArgOffset).addReg(ArgReg);
ArgOffset += 4; // 8 byte entry, not 4.
}
@@ -1031,12 +1154,12 @@
ArgOffset += 4;
}
} else {
- BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(0);
+ BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(0);
}
BB->push_back(CallMI);
- BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes);
+ BuildMI(BB, X86::ADJCALLSTACKUP, 1).addImm(NumBytes);
// If there is a return value, scavenge the result from the location the call
// leaves it in...
@@ -1050,7 +1173,7 @@
// Integral results are in %eax, or the appropriate portion
// thereof.
static const unsigned regRegMove[] = {
- X86::MOVrr8, X86::MOVrr16, X86::MOVrr32
+ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr
};
static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
@@ -1060,8 +1183,8 @@
BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
break;
case cLong: // Long values are left in EDX:EAX
- BuildMI(BB, X86::MOVrr32, 1, Ret.Reg).addReg(X86::EAX);
- BuildMI(BB, X86::MOVrr32, 1, Ret.Reg+1).addReg(X86::EDX);
+ BuildMI(BB, X86::MOV32rr, 1, Ret.Reg).addReg(X86::EAX);
+ BuildMI(BB, X86::MOV32rr, 1, Ret.Reg+1).addReg(X86::EDX);
break;
default: assert(0 && "Unknown class!");
}
@@ -1083,7 +1206,7 @@
TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
} else { // Emit an indirect call...
unsigned Reg = getReg(CI.getCalledValue());
- TheCall = BuildMI(X86::CALLr32, 1).addReg(Reg);
+ TheCall = BuildMI(X86::CALL32r, 1).addReg(Reg);
}
std::vector<ValueRecord> Args;
@@ -1108,6 +1231,10 @@
case Intrinsic::va_start:
case Intrinsic::va_copy:
case Intrinsic::va_end:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
// We directly implement these intrinsics
break;
default:
@@ -1129,16 +1256,146 @@
case Intrinsic::va_start:
// Get the address of the first vararg value...
TmpReg1 = getReg(CI);
- addFrameReference(BuildMI(BB, X86::LEAr32, 5, TmpReg1), VarArgsFrameIndex);
+ addFrameReference(BuildMI(BB, X86::LEA32r, 5, TmpReg1), VarArgsFrameIndex);
return;
case Intrinsic::va_copy:
TmpReg1 = getReg(CI);
TmpReg2 = getReg(CI.getOperand(1));
- BuildMI(BB, X86::MOVrr32, 1, TmpReg1).addReg(TmpReg2);
+ BuildMI(BB, X86::MOV32rr, 1, TmpReg1).addReg(TmpReg2);
return;
case Intrinsic::va_end: return; // Noop on X86
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ TmpReg1 = getReg(CI);
+ if (cast<Constant>(CI.getOperand(1))->isNullValue()) {
+ if (ID == Intrinsic::returnaddress) {
+ // Just load the return address
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1),
+ ReturnAddressIndex);
+ } else {
+ addFrameReference(BuildMI(BB, X86::LEA32r, 4, TmpReg1),
+ ReturnAddressIndex, -4);
+ }
+ } else {
+ // Values other than zero are not implemented yet.
+ BuildMI(BB, X86::MOV32ri, 1, TmpReg1).addImm(0);
+ }
+ return;
+
+ case Intrinsic::memcpy: {
+ assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
+ unsigned Align = 1;
+ if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+ Align = AlignC->getRawValue();
+ if (Align == 0) Align = 1;
+ }
+
+ // Turn the byte code into # iterations
+ unsigned CountReg;
+ unsigned Opcode;
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
+ CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
+ } else {
+ CountReg = makeAnotherReg(Type::IntTy);
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
+ }
+ Opcode = X86::REP_MOVSW;
+ break;
+ case 0: // DWORD aligned
+ if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
+ CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
+ } else {
+ CountReg = makeAnotherReg(Type::IntTy);
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
+ }
+ Opcode = X86::REP_MOVSD;
+ break;
+ default: // BYTE aligned
+ CountReg = getReg(CI.getOperand(3));
+ Opcode = X86::REP_MOVSB;
+ break;
+ }
+
+ // No matter what the alignment is, we put the source in ESI, the
+ // destination in EDI, and the count in ECX.
+ TmpReg1 = getReg(CI.getOperand(1));
+ TmpReg2 = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
+ BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
+ BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2);
+ BuildMI(BB, Opcode, 0);
+ return;
+ }
+ case Intrinsic::memset: {
+ assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!");
+ unsigned Align = 1;
+ if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+ Align = AlignC->getRawValue();
+ if (Align == 0) Align = 1;
+ }
+
+ // Turn the byte code into # iterations
+ unsigned CountReg;
+ unsigned Opcode;
+ if (ConstantInt *ValC = dyn_cast<ConstantInt>(CI.getOperand(2))) {
+ unsigned Val = ValC->getRawValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger copies.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
+ CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
+ } else {
+ CountReg = makeAnotherReg(Type::IntTy);
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
+ }
+ BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val);
+ Opcode = X86::REP_STOSW;
+ break;
+ case 0: // DWORD aligned
+ if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
+ CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
+ } else {
+ CountReg = makeAnotherReg(Type::IntTy);
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
+ }
+ Val = (Val << 8) | Val;
+ BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val);
+ Opcode = X86::REP_STOSD;
+ break;
+ default: // BYTE aligned
+ CountReg = getReg(CI.getOperand(3));
+ BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val);
+ Opcode = X86::REP_STOSB;
+ break;
+ }
+ } else {
+ // If it's not a constant value we are storing, just fall back. We could
+ // try to be clever to form 16 bit and 32 bit values, but we don't yet.
+ unsigned ValReg = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
+ CountReg = getReg(CI.getOperand(3));
+ Opcode = X86::REP_STOSB;
+ }
+
+ // No matter what the alignment is, we put the source in ESI, the
+ // destination in EDI, and the count in ECX.
+ TmpReg1 = getReg(CI.getOperand(1));
+ //TmpReg2 = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
+ BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
+ BuildMI(BB, Opcode, 0);
+ return;
+ }
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
@@ -1162,40 +1419,47 @@
/// and constant expression support.
///
void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned DestReg) {
unsigned Class = getClassB(Op0->getType());
// sub 0, X -> neg X
if (OperatorClass == 1 && Class != cLong)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0)) {
if (CI->isNullValue()) {
unsigned op1Reg = getReg(Op1, MBB, IP);
switch (Class) {
default: assert(0 && "Unknown class for this function!");
case cByte:
- BMI(MBB, IP, X86::NEGr8, 1, DestReg).addReg(op1Reg);
+ BuildMI(*MBB, IP, X86::NEG8r, 1, DestReg).addReg(op1Reg);
return;
case cShort:
- BMI(MBB, IP, X86::NEGr16, 1, DestReg).addReg(op1Reg);
+ BuildMI(*MBB, IP, X86::NEG16r, 1, DestReg).addReg(op1Reg);
return;
case cInt:
- BMI(MBB, IP, X86::NEGr32, 1, DestReg).addReg(op1Reg);
+ BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg).addReg(op1Reg);
return;
}
}
+ } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
+ if (CFP->isExactlyValue(-0.0)) {
+ // -0.0 - X === -X
+ unsigned op1Reg = getReg(Op1, MBB, IP);
+ BuildMI(*MBB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
+ return;
+ }
if (!isa<ConstantInt>(Op1) || Class == cLong) {
static const unsigned OpcodeTab[][4] = {
// Arithmetic operators
- { X86::ADDrr8, X86::ADDrr16, X86::ADDrr32, X86::FpADD }, // ADD
- { X86::SUBrr8, X86::SUBrr16, X86::SUBrr32, X86::FpSUB }, // SUB
+ { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD }, // ADD
+ { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB }, // SUB
// Bitwise operators
- { X86::ANDrr8, X86::ANDrr16, X86::ANDrr32, 0 }, // AND
- { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR
- { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR
+ { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0 }, // AND
+ { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0 }, // OR
+ { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0 }, // XOR
};
bool isLong = false;
@@ -1208,13 +1472,13 @@
assert(Opcode && "Floating point arguments to logical inst?");
unsigned Op0r = getReg(Op0, MBB, IP);
unsigned Op1r = getReg(Op1, MBB, IP);
- BMI(MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
if (isLong) { // Handle the upper 32 bits of long values...
static const unsigned TopTab[] = {
- X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32
+ X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
};
- BMI(MBB, IP, TopTab[OperatorClass], 2,
+ BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
}
return;
@@ -1226,34 +1490,34 @@
// xor X, -1 -> not X
if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
- static unsigned const NOTTab[] = { X86::NOTr8, X86::NOTr16, X86::NOTr32 };
- BMI(MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
+ static unsigned const NOTTab[] = { X86::NOT8r, X86::NOT16r, X86::NOT32r };
+ BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
return;
}
// add X, -1 -> dec X
if (OperatorClass == 0 && Op1C->isAllOnesValue()) {
- static unsigned const DECTab[] = { X86::DECr8, X86::DECr16, X86::DECr32 };
- BMI(MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
+ static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
+ BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
return;
}
// add X, 1 -> inc X
if (OperatorClass == 0 && Op1C->equalsInt(1)) {
- static unsigned const DECTab[] = { X86::INCr8, X86::INCr16, X86::INCr32 };
- BMI(MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
+ static unsigned const DECTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
+ BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
return;
}
static const unsigned OpcodeTab[][3] = {
// Arithmetic operators
- { X86::ADDri8, X86::ADDri16, X86::ADDri32 }, // ADD
- { X86::SUBri8, X86::SUBri16, X86::SUBri32 }, // SUB
+ { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri }, // ADD
+ { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri }, // SUB
// Bitwise operators
- { X86::ANDri8, X86::ANDri16, X86::ANDri32 }, // AND
- { X86:: ORri8, X86:: ORri16, X86:: ORri32 }, // OR
- { X86::XORri8, X86::XORri16, X86::XORri32 }, // XOR
+ { X86::AND8ri, X86::AND16ri, X86::AND32ri }, // AND
+ { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri }, // OR
+ { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri }, // XOR
};
assert(Class < 3 && "General code handles 64-bit integer types!");
@@ -1262,31 +1526,31 @@
// Mask off any upper bits of the constant, if there are any...
Op1v &= (1ULL << (8 << Class)) - 1;
- BMI(MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addZImm(Op1v);
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1v);
}
/// doMultiply - Emit appropriate instructions to multiply together the
/// registers op0Reg and op1Reg, and put the result in DestReg. The type of the
/// result should be given as DestTy.
///
-void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
+void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned op1Reg) {
unsigned Class = getClass(DestTy);
switch (Class) {
case cFP: // Floating point multiply
- BMI(BB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
+ BuildMI(*MBB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
return;
case cInt:
case cShort:
- BMI(BB, MBBI, Class == cInt ? X86::IMULrr32 : X86::IMULrr16, 2, DestReg)
+ BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
.addReg(op0Reg).addReg(op1Reg);
return;
case cByte:
// Must use the MUL instruction, which forces use of AL...
- BMI(MBB, MBBI, X86::MOVrr8, 1, X86::AL).addReg(op0Reg);
- BMI(MBB, MBBI, X86::MULr8, 1).addReg(op1Reg);
- BMI(MBB, MBBI, X86::MOVrr8, 1, DestReg).addReg(X86::AL);
+ BuildMI(*MBB, MBBI, X86::MOV8rr, 1, X86::AL).addReg(op0Reg);
+ BuildMI(*MBB, MBBI, X86::MUL8r, 1).addReg(op1Reg);
+ BuildMI(*MBB, MBBI, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
return;
default:
case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
@@ -1307,7 +1571,7 @@
}
void ISel::doMultiplyConst(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned ConstRHS) {
unsigned Class = getClass(DestTy);
@@ -1317,32 +1581,32 @@
switch (Class) {
default: assert(0 && "Unknown class for this function!");
case cByte:
- BMI(MBB, IP, X86::SHLir32, 2, DestReg).addReg(op0Reg).addZImm(Shift-1);
+ BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
return;
case cShort:
- BMI(MBB, IP, X86::SHLir32, 2, DestReg).addReg(op0Reg).addZImm(Shift-1);
+ BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
return;
case cInt:
- BMI(MBB, IP, X86::SHLir32, 2, DestReg).addReg(op0Reg).addZImm(Shift-1);
+ BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
return;
}
}
if (Class == cShort) {
- BMI(MBB, IP, X86::IMULri16, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+ BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
return;
} else if (Class == cInt) {
- BMI(MBB, IP, X86::IMULri32, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+ BuildMI(*MBB, IP, X86::IMUL32rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
return;
}
// Most general case, emit a normal multiply...
- static const unsigned MOVirTab[] = {
- X86::MOVir8, X86::MOVir16, X86::MOVir32
+ static const unsigned MOVriTab[] = {
+ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
};
unsigned TmpReg = makeAnotherReg(DestTy);
- BMI(MBB, IP, MOVirTab[Class], 1, TmpReg).addZImm(ConstRHS);
+ BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
// Emit a MUL to multiply the register holding the index by
// elementSize, putting the result in OffsetReg.
@@ -1372,26 +1636,26 @@
// Long value. We have to do things the hard way...
// Multiply the two low parts... capturing carry into EDX
- BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(Op0Reg);
- BuildMI(BB, X86::MULr32, 1).addReg(Op1Reg); // AL*BL
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(X86::EAX); // AL*BL
- BuildMI(BB, X86::MOVrr32, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+ BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
MachineBasicBlock::iterator MBBI = BB->end();
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
- BMI(BB, MBBI, X86::IMULrr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+ BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::ADDrr32, 2, // AH*BL+(AL*BL >> 32)
+ BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
MBBI = BB->end();
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
- BMI(BB, MBBI, X86::IMULrr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+ BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
- BuildMI(BB, X86::ADDrr32, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
}
@@ -1413,14 +1677,14 @@
}
void ISel::emitDivRemOperation(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
unsigned Op0Reg, unsigned Op1Reg, bool isDiv,
const Type *Ty, unsigned ResultReg) {
unsigned Class = getClass(Ty);
switch (Class) {
case cFP: // Floating point divide
if (isDiv) {
- BMI(BB, IP, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ BuildMI(*BB, IP, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
} else { // Floating point remainder...
MachineInstr *TheCall =
BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
@@ -1450,14 +1714,14 @@
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
- static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
- static const unsigned SarOpcode[]={ X86::SARir8, X86::SARir16, X86::SARir32 };
- static const unsigned ClrOpcode[]={ X86::MOVir8, X86::MOVir16, X86::MOVir32 };
+ static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
+ static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
+ static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
static const unsigned DivOpcode[][4] = {
- { X86::DIVr8 , X86::DIVr16 , X86::DIVr32 , 0 }, // Unsigned division
- { X86::IDIVr8, X86::IDIVr16, X86::IDIVr32, 0 }, // Signed division
+ { X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 }, // Unsigned division
+ { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division
};
bool isSigned = Ty->isSigned();
@@ -1465,26 +1729,26 @@
unsigned ExtReg = ExtRegs[Class];
// Put the first operand into one of the A registers...
- BMI(BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
if (isSigned) {
// Emit a sign extension instruction...
unsigned ShiftResult = makeAnotherReg(Ty);
- BMI(BB, IP, SarOpcode[Class], 2, ShiftResult).addReg(Op0Reg).addZImm(31);
- BMI(BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
+ BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
} else {
// If unsigned, emit a zeroing instruction... (reg = 0)
- BMI(BB, IP, ClrOpcode[Class], 2, ExtReg).addZImm(0);
+ BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
}
// Emit the appropriate divide or remainder instruction...
- BMI(BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg);
+ BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg);
// Figure out which register we want to pick the result out of...
unsigned DestReg = isDiv ? Reg : ExtReg;
// Put the result into the destination register...
- BMI(BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
}
@@ -1503,7 +1767,7 @@
/// emitShiftOperation - Common code shared between visitShiftInst and
/// constant expression support.
void ISel::emitShiftOperation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Op, Value *ShiftAmount, bool isLeftShift,
const Type *ResultTy, unsigned DestReg) {
unsigned SrcReg = getReg (Op, MBB, IP);
@@ -1511,17 +1775,17 @@
unsigned Class = getClass (ResultTy);
static const unsigned ConstantOperand[][4] = {
- { X86::SHRir8, X86::SHRir16, X86::SHRir32, X86::SHRDir32 }, // SHR
- { X86::SARir8, X86::SARir16, X86::SARir32, X86::SHRDir32 }, // SAR
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, X86::SHLDir32 }, // SHL
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, X86::SHLDir32 }, // SAL = SHL
+ { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri, X86::SHRD32rri8 }, // SHR
+ { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri, X86::SHRD32rri8 }, // SAR
+ { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 }, // SHL
+ { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 }, // SAL = SHL
};
static const unsigned NonConstantOperand[][4] = {
- { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32 }, // SHR
- { X86::SARrr8, X86::SARrr16, X86::SARrr32 }, // SAR
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32 }, // SHL
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32 }, // SAL = SHL
+ { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL }, // SHR
+ { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL }, // SAR
+ { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SHL
+ { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SAL = SHL
};
// Longs, as usual, are handled specially...
@@ -1534,25 +1798,25 @@
if (Amount < 32) {
const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
if (isLeftShift) {
- BMI(MBB, IP, Opc[3], 3,
- DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addZImm(Amount);
- BMI(MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addZImm(Amount);
+ BuildMI(*MBB, IP, Opc[3], 3,
+ DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addImm(Amount);
+ BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
} else {
- BMI(MBB, IP, Opc[3], 3,
- DestReg).addReg(SrcReg ).addReg(SrcReg+1).addZImm(Amount);
- BMI(MBB, IP, Opc[2], 2, DestReg+1).addReg(SrcReg+1).addZImm(Amount);
+ BuildMI(*MBB, IP, Opc[3], 3,
+ DestReg).addReg(SrcReg ).addReg(SrcReg+1).addImm(Amount);
+ BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
}
} else { // Shifting more than 32 bits
Amount -= 32;
if (isLeftShift) {
- BMI(MBB, IP, X86::SHLir32, 2,
- DestReg + 1).addReg(SrcReg).addZImm(Amount);
- BMI(MBB, IP, X86::MOVir32, 1,
- DestReg).addZImm(0);
+ BuildMI(*MBB, IP, X86::SHL32ri, 2,
+ DestReg + 1).addReg(SrcReg).addImm(Amount);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1,
+ DestReg).addImm(0);
} else {
- unsigned Opcode = isSigned ? X86::SARir32 : X86::SHRir32;
- BMI(MBB, IP, Opcode, 2, DestReg).addReg(SrcReg+1).addZImm(Amount);
- BMI(MBB, IP, X86::MOVir32, 1, DestReg+1).addZImm(0);
+ unsigned Opcode = isSigned ? X86::SAR32ri : X86::SHR32ri;
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(SrcReg+1).addImm(Amount);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
}
}
} else {
@@ -1562,50 +1826,52 @@
// If this is a SHR of a Long, then we need to do funny sign extension
// stuff. TmpReg gets the value to use as the high-part if we are
// shifting more than 32 bits.
- BMI(MBB, IP, X86::SARir32, 2, TmpReg).addReg(SrcReg).addZImm(31);
+ BuildMI(*MBB, IP, X86::SAR32ri, 2, TmpReg).addReg(SrcReg).addImm(31);
} else {
// Other shifts use a fixed zero value if the shift is more than 32
// bits.
- BMI(MBB, IP, X86::MOVir32, 1, TmpReg).addZImm(0);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, TmpReg).addImm(0);
}
// Initialize CL with the shift amount...
unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP);
- BMI(MBB, IP, X86::MOVrr8, 1, X86::CL).addReg(ShiftAmountReg);
+ BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
unsigned TmpReg2 = makeAnotherReg(Type::IntTy);
unsigned TmpReg3 = makeAnotherReg(Type::IntTy);
if (isLeftShift) {
// TmpReg2 = shld inHi, inLo
- BMI(MBB, IP, X86::SHLDrr32, 2, TmpReg2).addReg(SrcReg+1).addReg(SrcReg);
+ BuildMI(*MBB, IP, X86::SHLD32rrCL,2,TmpReg2).addReg(SrcReg+1)
+ .addReg(SrcReg);
// TmpReg3 = shl inLo, CL
- BMI(MBB, IP, X86::SHLrr32, 1, TmpReg3).addReg(SrcReg);
+ BuildMI(*MBB, IP, X86::SHL32rCL, 1, TmpReg3).addReg(SrcReg);
// Set the flags to indicate whether the shift was by more than 32 bits.
- BMI(MBB, IP, X86::TESTri8, 2).addReg(X86::CL).addZImm(32);
+ BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
// DestHi = (>32) ? TmpReg3 : TmpReg2;
- BMI(MBB, IP, X86::CMOVNErr32, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg+1).addReg(TmpReg2).addReg(TmpReg3);
// DestLo = (>32) ? TmpReg : TmpReg3;
- BMI(MBB, IP, X86::CMOVNErr32, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg).addReg(TmpReg3).addReg(TmpReg);
} else {
// TmpReg2 = shrd inLo, inHi
- BMI(MBB, IP, X86::SHRDrr32, 2, TmpReg2).addReg(SrcReg).addReg(SrcReg+1);
+ BuildMI(*MBB, IP, X86::SHRD32rrCL,2,TmpReg2).addReg(SrcReg)
+ .addReg(SrcReg+1);
// TmpReg3 = s[ah]r inHi, CL
- BMI(MBB, IP, isSigned ? X86::SARrr32 : X86::SHRrr32, 1, TmpReg3)
+ BuildMI(*MBB, IP, isSigned ? X86::SAR32rCL : X86::SHR32rCL, 1, TmpReg3)
.addReg(SrcReg+1);
// Set the flags to indicate whether the shift was by more than 32 bits.
- BMI(MBB, IP, X86::TESTri8, 2).addReg(X86::CL).addZImm(32);
+ BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
// DestLo = (>32) ? TmpReg3 : TmpReg2;
- BMI(MBB, IP, X86::CMOVNErr32, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg).addReg(TmpReg2).addReg(TmpReg3);
// DestHi = (>32) ? TmpReg : TmpReg3;
- BMI(MBB, IP, X86::CMOVNErr32, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg+1).addReg(TmpReg3).addReg(TmpReg);
}
}
@@ -1617,14 +1883,14 @@
assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
- BMI(MBB, IP, Opc[Class], 2,
- DestReg).addReg(SrcReg).addZImm(CUI->getValue());
+ BuildMI(*MBB, IP, Opc[Class], 2,
+ DestReg).addReg(SrcReg).addImm(CUI->getValue());
} else { // The shift amount is non-constant.
unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
- BMI(MBB, IP, X86::MOVrr8, 1, X86::CL).addReg(ShiftAmountReg);
+ BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned];
- BMI(MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg);
+ BuildMI(*MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg);
}
}
@@ -1634,47 +1900,106 @@
/// need to worry about the memory layout of the target machine.
///
void ISel::visitLoadInst(LoadInst &I) {
- unsigned SrcAddrReg = getReg(I.getOperand(0));
unsigned DestReg = getReg(I);
+ unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+ Value *Addr = I.getOperand(0);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+ if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+ BaseReg, Scale, IndexReg, Disp))
+ Addr = 0; // Address is consumed!
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+ BaseReg, Scale, IndexReg, Disp))
+ Addr = 0;
+ }
+
+ if (Addr) {
+ // If it's not foldable, reset addr mode.
+ BaseReg = getReg(Addr);
+ Scale = 1; IndexReg = 0; Disp = 0;
+ }
unsigned Class = getClassB(I.getType());
-
if (Class == cLong) {
- addDirectMem(BuildMI(BB, X86::MOVmr32, 4, DestReg), SrcAddrReg);
- addRegOffset(BuildMI(BB, X86::MOVmr32, 4, DestReg+1), SrcAddrReg, 4);
+ addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg),
+ BaseReg, Scale, IndexReg, Disp);
+ addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg+1),
+ BaseReg, Scale, IndexReg, Disp+4);
return;
}
static const unsigned Opcodes[] = {
- X86::MOVmr8, X86::MOVmr16, X86::MOVmr32, X86::FLDr32
+ X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD32m
};
unsigned Opcode = Opcodes[Class];
- if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64;
- addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg);
+ if (I.getType() == Type::DoubleTy) Opcode = X86::FLD64m;
+ addFullAddress(BuildMI(BB, Opcode, 4, DestReg),
+ BaseReg, Scale, IndexReg, Disp);
}
/// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
/// instruction.
///
void ISel::visitStoreInst(StoreInst &I) {
- unsigned ValReg = getReg(I.getOperand(0));
- unsigned AddressReg = getReg(I.getOperand(1));
-
+ unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+ Value *Addr = I.getOperand(1);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+ if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+ BaseReg, Scale, IndexReg, Disp))
+ Addr = 0; // Address is consumed!
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+ BaseReg, Scale, IndexReg, Disp))
+ Addr = 0;
+ }
+
+ if (Addr) {
+ // If it's not foldable, reset addr mode.
+ BaseReg = getReg(Addr);
+ Scale = 1; IndexReg = 0; Disp = 0;
+ }
+
const Type *ValTy = I.getOperand(0)->getType();
unsigned Class = getClassB(ValTy);
- if (Class == cLong) {
- addDirectMem(BuildMI(BB, X86::MOVrm32, 1+4), AddressReg).addReg(ValReg);
- addRegOffset(BuildMI(BB, X86::MOVrm32, 1+4), AddressReg,4).addReg(ValReg+1);
- return;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
+ uint64_t Val = CI->getRawValue();
+ if (Class == cLong) {
+ addFullAddress(BuildMI(BB, X86::MOV32mi, 5),
+ BaseReg, Scale, IndexReg, Disp).addImm(Val & ~0U);
+ addFullAddress(BuildMI(BB, X86::MOV32mi, 5),
+ BaseReg, Scale, IndexReg, Disp+4).addImm(Val>>32);
+ } else {
+ static const unsigned Opcodes[] = {
+ X86::MOV8mi, X86::MOV16mi, X86::MOV32mi
+ };
+ unsigned Opcode = Opcodes[Class];
+ addFullAddress(BuildMI(BB, Opcode, 5),
+ BaseReg, Scale, IndexReg, Disp).addImm(Val);
+ }
+ } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
+ addFullAddress(BuildMI(BB, X86::MOV8mi, 5),
+ BaseReg, Scale, IndexReg, Disp).addImm(CB->getValue());
+ } else {
+ if (Class == cLong) {
+ unsigned ValReg = getReg(I.getOperand(0));
+ addFullAddress(BuildMI(BB, X86::MOV32mr, 5),
+ BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
+ addFullAddress(BuildMI(BB, X86::MOV32mr, 5),
+ BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1);
+ } else {
+ unsigned ValReg = getReg(I.getOperand(0));
+ static const unsigned Opcodes[] = {
+ X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST32m
+ };
+ unsigned Opcode = Opcodes[Class];
+ if (ValTy == Type::DoubleTy) Opcode = X86::FST64m;
+ addFullAddress(BuildMI(BB, Opcode, 1+4),
+ BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
+ }
}
-
- static const unsigned Opcodes[] = {
- X86::MOVrm8, X86::MOVrm16, X86::MOVrm32, X86::FSTr32
- };
- unsigned Opcode = Opcodes[Class];
- if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64;
- addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg);
}
@@ -1706,7 +2031,7 @@
/// emitCastOperation - Common code shared between visitCastInst and
/// constant expression cast support.
void ISel::emitCastOperation(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Src, const Type *DestTy,
unsigned DestReg) {
unsigned SrcReg = getReg(Src, BB, IP);
@@ -1719,43 +2044,45 @@
if (DestTy == Type::BoolTy) {
switch (SrcClass) {
case cByte:
- BMI(BB, IP, X86::TESTrr8, 2).addReg(SrcReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::TEST8rr, 2).addReg(SrcReg).addReg(SrcReg);
break;
case cShort:
- BMI(BB, IP, X86::TESTrr16, 2).addReg(SrcReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::TEST16rr, 2).addReg(SrcReg).addReg(SrcReg);
break;
case cInt:
- BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg).addReg(SrcReg);
break;
case cLong: {
unsigned TmpReg = makeAnotherReg(Type::IntTy);
- BMI(BB, IP, X86::ORrr32, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1);
+ BuildMI(*BB, IP, X86::OR32rr, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1);
break;
}
case cFP:
- assert(0 && "FIXME: implement cast FP to bool");
- abort();
+ BuildMI(*BB, IP, X86::FTST, 1).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::FNSTSW8r, 0);
+ BuildMI(*BB, IP, X86::SAHF, 1);
+ break;
}
// If the zero flag is not set, then the value is true, set the byte to
// true.
- BMI(BB, IP, X86::SETNEr, 1, DestReg);
+ BuildMI(*BB, IP, X86::SETNEr, 1, DestReg);
return;
}
static const unsigned RegRegMove[] = {
- X86::MOVrr8, X86::MOVrr16, X86::MOVrr32, X86::FpMOV, X86::MOVrr32
+ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
};
// Implement casts between values of the same type class (as determined by
// getClass) by using a register-to-register move.
if (SrcClass == DestClass) {
if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
- BMI(BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
+ BuildMI(*BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
} else if (SrcClass == cFP) {
if (SrcTy == Type::FloatTy) { // double -> float
assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
- BMI(BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg);
} else { // float -> double
assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
"Unknown cFP member!");
@@ -1763,12 +2090,12 @@
// reading it back.
unsigned FltAlign = TM.getTargetData().getFloatAlignment();
int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
- addFrameReference(BMI(BB, IP, X86::FSTr32, 5), FrameIdx).addReg(SrcReg);
- addFrameReference(BMI(BB, IP, X86::FLDr32, 5, DestReg), FrameIdx);
+ addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx);
}
} else if (SrcClass == cLong) {
- BMI(BB, IP, X86::MOVrr32, 1, DestReg).addReg(SrcReg);
- BMI(BB, IP, X86::MOVrr32, 1, DestReg+1).addReg(SrcReg+1);
+ BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg+1);
} else {
assert(0 && "Cannot handle this type of cast instruction!");
abort();
@@ -1784,26 +2111,26 @@
if (isLong) DestClass = cInt;
static const unsigned Opc[][4] = {
- { X86::MOVSXr16r8, X86::MOVSXr32r8, X86::MOVSXr32r16, X86::MOVrr32 }, // s
- { X86::MOVZXr16r8, X86::MOVZXr32r8, X86::MOVZXr32r16, X86::MOVrr32 } // u
+ { X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s
+ { X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr } // u
};
bool isUnsigned = SrcTy->isUnsigned();
- BMI(BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
+ BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
DestReg).addReg(SrcReg);
if (isLong) { // Handle upper 32 bits as appropriate...
if (isUnsigned) // Zero out top bits...
- BMI(BB, IP, X86::MOVir32, 1, DestReg+1).addZImm(0);
+ BuildMI(*BB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
else // Sign extend bottom half...
- BMI(BB, IP, X86::SARir32, 2, DestReg+1).addReg(DestReg).addZImm(31);
+ BuildMI(*BB, IP, X86::SAR32ri, 2, DestReg+1).addReg(DestReg).addImm(31);
}
return;
}
// Special case long -> int ...
if (SrcClass == cLong && DestClass == cInt) {
- BMI(BB, IP, X86::MOVrr32, 1, DestReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
return;
}
@@ -1812,8 +2139,8 @@
if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
&& SrcClass > DestClass) {
static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX };
- BMI(BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
- BMI(BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
+ BuildMI(*BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
+ BuildMI(*BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
return;
}
@@ -1825,42 +2152,45 @@
//
const Type *PromoteType = 0;
unsigned PromoteOpcode;
+ unsigned RealDestReg = DestReg;
switch (SrcTy->getPrimitiveID()) {
case Type::BoolTyID:
case Type::SByteTyID:
// We don't have the facilities for directly loading byte sized data from
// memory (even signed). Promote it to 16 bits.
PromoteType = Type::ShortTy;
- PromoteOpcode = X86::MOVSXr16r8;
+ PromoteOpcode = X86::MOVSX16rr8;
break;
case Type::UByteTyID:
PromoteType = Type::ShortTy;
- PromoteOpcode = X86::MOVZXr16r8;
+ PromoteOpcode = X86::MOVZX16rr8;
break;
case Type::UShortTyID:
PromoteType = Type::IntTy;
- PromoteOpcode = X86::MOVZXr32r16;
+ PromoteOpcode = X86::MOVZX32rr16;
break;
case Type::UIntTyID: {
// Make a 64 bit temporary... and zero out the top of it...
unsigned TmpReg = makeAnotherReg(Type::LongTy);
- BMI(BB, IP, X86::MOVrr32, 1, TmpReg).addReg(SrcReg);
- BMI(BB, IP, X86::MOVir32, 1, TmpReg+1).addZImm(0);
+ BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
+ BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
SrcTy = Type::LongTy;
SrcClass = cLong;
SrcReg = TmpReg;
break;
}
case Type::ULongTyID:
- assert("FIXME: not implemented: cast ulong X to fp type!");
+ // Don't fild into the read destination.
+ DestReg = makeAnotherReg(Type::DoubleTy);
+ break;
default: // No promotion needed...
break;
}
if (PromoteType) {
unsigned TmpReg = makeAnotherReg(PromoteType);
- BMI(BB, IP, SrcTy->isSigned() ? X86::MOVSXr16r8 : X86::MOVZXr16r8,
- 1, TmpReg).addReg(SrcReg);
+ unsigned Opc = SrcTy->isSigned() ? X86::MOVSX16rr8 : X86::MOVZX16rr8;
+ BuildMI(*BB, IP, Opc, 1, TmpReg).addReg(SrcReg);
SrcTy = PromoteType;
SrcClass = getClass(PromoteType);
SrcReg = TmpReg;
@@ -1871,17 +2201,51 @@
F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
if (SrcClass == cLong) {
- addFrameReference(BMI(BB, IP, X86::MOVrm32, 5), FrameIdx).addReg(SrcReg);
- addFrameReference(BMI(BB, IP, X86::MOVrm32, 5),
+ addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
+ FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
FrameIdx, 4).addReg(SrcReg+1);
} else {
- static const unsigned Op1[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
- addFrameReference(BMI(BB, IP, Op1[SrcClass], 5), FrameIdx).addReg(SrcReg);
+ static const unsigned Op1[] = { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr };
+ addFrameReference(BuildMI(*BB, IP, Op1[SrcClass], 5),
+ FrameIdx).addReg(SrcReg);
}
static const unsigned Op2[] =
- { 0/*byte*/, X86::FILDr16, X86::FILDr32, 0/*FP*/, X86::FILDr64 };
- addFrameReference(BMI(BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
+ { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
+ addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
+
+ // We need special handling for unsigned 64-bit integer sources. If the
+ // input number has the "sign bit" set, then we loaded it incorrectly as a
+ // negative 64-bit number. In this case, add an offset value.
+ if (SrcTy == Type::ULongTy) {
+ // Emit a test instruction to see if the dynamic input value was signed.
+ BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);
+
+ // If the sign bit is set, get a pointer to an offset, otherwise get a
+ // pointer to a zero.
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned Zero = makeAnotherReg(Type::IntTy);
+ Constant *Null = Constant::getNullValue(Type::UIntTy);
+ addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero),
+ CP->getConstantPoolIndex(Null));
+ unsigned Offset = makeAnotherReg(Type::IntTy);
+ Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
+
+ addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset),
+ CP->getConstantPoolIndex(OffsetCst));
+ unsigned Addr = makeAnotherReg(Type::IntTy);
+ BuildMI(*BB, IP, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset);
+
+ // Load the constant for an add. FIXME: this could make an 'fadd' that
+ // reads directly from memory, but we don't support these yet.
+ unsigned ConstReg = makeAnotherReg(Type::DoubleTy);
+ addDirectMem(BuildMI(*BB, IP, X86::FLD32m, 4, ConstReg), Addr);
+
+ BuildMI(*BB, IP, X86::FpADD, 2, RealDestReg)
+ .addReg(ConstReg).addReg(DestReg);
+ }
+
return;
}
@@ -1891,20 +2255,22 @@
// mode when truncating to an integer value.
//
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
- addFrameReference(BMI(BB, IP, X86::FNSTCWm16, 4), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, IP, X86::FNSTCW16m, 4), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy);
- addFrameReference(BMI(BB, IP, X86::MOVmr8, 4, HighPartOfCW), CWFrameIdx, 1);
+ addFrameReference(BuildMI(*BB, IP, X86::MOV8rm, 4, HighPartOfCW),
+ CWFrameIdx, 1);
// Set the high part to be round to zero...
- addFrameReference(BMI(BB, IP, X86::MOVim8, 5), CWFrameIdx, 1).addZImm(12);
+ addFrameReference(BuildMI(*BB, IP, X86::MOV8mi, 5),
+ CWFrameIdx, 1).addImm(12);
// Reload the modified control word now...
- addFrameReference(BMI(BB, IP, X86::FLDCWm16, 4), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
// Restore the memory image of control word to original value
- addFrameReference(BMI(BB, IP, X86::MOVrm8, 5),
+ addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5),
CWFrameIdx, 1).addReg(HighPartOfCW);
// We don't have the facilities for directly storing byte sized data to
@@ -1929,19 +2295,21 @@
F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
static const unsigned Op1[] =
- { 0, X86::FISTr16, X86::FISTr32, 0, X86::FISTPr64 };
- addFrameReference(BMI(BB, IP, Op1[StoreClass], 5), FrameIdx).addReg(SrcReg);
+ { 0, X86::FIST16m, X86::FIST32m, 0, X86::FISTP64m };
+ addFrameReference(BuildMI(*BB, IP, Op1[StoreClass], 5),
+ FrameIdx).addReg(SrcReg);
if (DestClass == cLong) {
- addFrameReference(BMI(BB, IP, X86::MOVmr32, 4, DestReg), FrameIdx);
- addFrameReference(BMI(BB, IP, X86::MOVmr32, 4, DestReg+1), FrameIdx, 4);
+ addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg), FrameIdx);
+ addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg+1),
+ FrameIdx, 4);
} else {
- static const unsigned Op2[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
- addFrameReference(BMI(BB, IP, Op2[DestClass], 4, DestReg), FrameIdx);
+ static const unsigned Op2[] = { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm };
+ addFrameReference(BuildMI(*BB, IP, Op2[DestClass], 4, DestReg), FrameIdx);
}
// Reload the original control word now...
- addFrameReference(BMI(BB, IP, X86::FLDCWm16, 4), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
return;
}
@@ -1975,7 +2343,7 @@
}
// Increment the VAList pointer...
- BuildMI(BB, X86::ADDri32, 2, DestReg).addReg(VAList).addZImm(Size);
+ BuildMI(BB, X86::ADD32ri, 2, DestReg).addReg(VAList).addImm(Size);
}
void ISel::visitVAArgInst(VAArgInst &I) {
@@ -1990,64 +2358,220 @@
case Type::PointerTyID:
case Type::UIntTyID:
case Type::IntTyID:
- addDirectMem(BuildMI(BB, X86::MOVmr32, 4, DestReg), VAList);
+ addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
break;
case Type::ULongTyID:
case Type::LongTyID:
- addDirectMem(BuildMI(BB, X86::MOVmr32, 4, DestReg), VAList);
- addRegOffset(BuildMI(BB, X86::MOVmr32, 4, DestReg+1), VAList, 4);
+ addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
+ addRegOffset(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), VAList, 4);
break;
case Type::DoubleTyID:
- addDirectMem(BuildMI(BB, X86::FLDr64, 4, DestReg), VAList);
+ addDirectMem(BuildMI(BB, X86::FLD64m, 4, DestReg), VAList);
break;
}
}
void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
+ // If this GEP instruction will be folded into all of its users, we don't need
+ // to explicitly calculate it!
+ unsigned A, B, C, D;
+ if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) {
+ // Check all of the users of the instruction to see if they are loads and
+ // stores.
+ bool AllWillFold = true;
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
+ if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
+ cast<Instruction>(*UI)->getOperand(0) == &I) {
+ AllWillFold = false;
+ break;
+ }
+
+ // If the instruction is foldable, and will be folded into all users, don't
+ // emit it!
+ if (AllWillFold) return;
+ }
+
unsigned outputReg = getReg(I);
- MachineBasicBlock::iterator MI = BB->end();
- emitGEPOperation(BB, MI, I.getOperand(0),
+ emitGEPOperation(BB, BB->end(), I.getOperand(0),
I.op_begin()+1, I.op_end(), outputReg);
}
+/// getGEPIndex - Inspect the getelementptr operands specified with GEPOps and
+/// GEPTypes (the derived types being stepped through at each level). On return
+/// from this function, if some indexes of the instruction are representable as
+/// an X86 lea instruction, the machine operands are put into the Ops
+/// instruction and the consumed indexes are poped from the GEPOps/GEPTypes
+/// lists. Otherwise, GEPOps.size() is returned. If this returns a an
+/// addressing mode that only partially consumes the input, the BaseReg input of
+/// the addressing mode must be left free.
+///
+/// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
+///
+void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+ std::vector<Value*> &GEPOps,
+ std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+ unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+ const TargetData &TD = TM.getTargetData();
+
+ // Clear out the state we are working with...
+ BaseReg = 0; // No base register
+ Scale = 1; // Unit scale
+ IndexReg = 0; // No index register
+ Disp = 0; // No displacement
+
+ // While there are GEP indexes that can be folded into the current address,
+ // keep processing them.
+ while (!GEPTypes.empty()) {
+ if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
+ // It's a struct access. CUI is the index into the structure,
+ // which names the field. This index must have unsigned type.
+ const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
+
+ // Use the TargetData structure to pick out what the layout of the
+ // structure is in memory. Since the structure index must be constant, we
+ // can get its value and use it to find the right byte offset from the
+ // StructLayout class's list of structure member offsets.
+ Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
+ GEPOps.pop_back(); // Consume a GEP operand
+ GEPTypes.pop_back();
+ } else {
+ // It's an array or pointer access: [ArraySize x ElementType].
+ const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
+ Value *idx = GEPOps.back();
+
+ // idx is the index into the array. Unlike with structure
+ // indices, we may not know its actual value at code-generation
+ // time.
+ assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
+
+ // If idx is a constant, fold it into the offset.
+ unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
+ if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
+ Disp += TypeSize*CSI->getValue();
+ } else {
+ // If the index reg is already taken, we can't handle this index.
+ if (IndexReg) return;
+
+ // If this is a size that we can handle, then add the index as
+ switch (TypeSize) {
+ case 1: case 2: case 4: case 8:
+ // These are all acceptable scales on X86.
+ Scale = TypeSize;
+ break;
+ default:
+ // Otherwise, we can't handle this scale
+ return;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(idx))
+ if (CI->getOperand(0)->getType() == Type::IntTy ||
+ CI->getOperand(0)->getType() == Type::UIntTy)
+ idx = CI->getOperand(0);
+
+ IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
+ }
+
+ GEPOps.pop_back(); // Consume a GEP operand
+ GEPTypes.pop_back();
+ }
+ }
+
+ // GEPTypes is empty, which means we have a single operand left. See if we
+ // can set it as the base register.
+ //
+ // FIXME: When addressing modes are more powerful/correct, we could load
+ // global addresses directly as 32-bit immediates.
+ assert(BaseReg == 0);
+ BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
+ GEPOps.pop_back(); // Consume the last GEP operand
+}
+
+
+/// isGEPFoldable - Return true if the specified GEP can be completely
+/// folded into the addressing mode of a load/store or lea instruction.
+bool ISel::isGEPFoldable(MachineBasicBlock *MBB,
+ Value *Src, User::op_iterator IdxBegin,
+ User::op_iterator IdxEnd, unsigned &BaseReg,
+ unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+ if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
+ Src = CPR->getValue();
+
+ std::vector<Value*> GEPOps;
+ GEPOps.resize(IdxEnd-IdxBegin+1);
+ GEPOps[0] = Src;
+ std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
+
+ std::vector<const Type*> GEPTypes;
+ GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
+ gep_type_end(Src->getType(), IdxBegin, IdxEnd));
+
+ MachineBasicBlock::iterator IP;
+ if (MBB) IP = MBB->end();
+ getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
+
+ // We can fold it away iff the getGEPIndex call eliminated all operands.
+ return GEPOps.empty();
+}
+
void ISel::emitGEPOperation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &IP,
+ MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg) {
const TargetData &TD = TM.getTargetData();
- const Type *Ty = Src->getType();
- unsigned BaseReg = getReg(Src, MBB, IP);
+ if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
+ Src = CPR->getValue();
+
+ std::vector<Value*> GEPOps;
+ GEPOps.resize(IdxEnd-IdxBegin+1);
+ GEPOps[0] = Src;
+ std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
+
+ std::vector<const Type*> GEPTypes;
+ GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
+ gep_type_end(Src->getType(), IdxBegin, IdxEnd));
+
+ // Keep emitting instructions until we consume the entire GEP instruction.
+ while (!GEPOps.empty()) {
+ unsigned OldSize = GEPOps.size();
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
+
+ if (GEPOps.size() != OldSize) {
+ // getGEPIndex consumed some of the input. Build an LEA instruction here.
+ unsigned NextTarget = 0;
+ if (!GEPOps.empty()) {
+ assert(BaseReg == 0 &&
+ "getGEPIndex should have left the base register open for chaining!");
+ NextTarget = BaseReg = makeAnotherReg(Type::UIntTy);
+ }
- // GEPs have zero or more indices; we must perform a struct access
- // or array access for each one.
- for (GetElementPtrInst::op_iterator oi = IdxBegin,
- oe = IdxEnd; oi != oe; ++oi) {
- Value *idx = *oi;
- unsigned NextReg = BaseReg;
- if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
- // It's a struct access. idx is the index into the structure,
- // which names the field. This index must have ubyte type.
- const ConstantUInt *CUI = cast<ConstantUInt>(idx);
- assert(CUI->getType() == Type::UByteTy
- && "Funny-looking structure index in GEP");
- // Use the TargetData structure to pick out what the layout of
- // the structure is in memory. Since the structure index must
- // be constant, we can get its value and use it to find the
- // right byte offset from the StructLayout class's list of
- // structure member offsets.
- unsigned idxValue = CUI->getValue();
- unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
- if (FieldOff) {
- NextReg = makeAnotherReg(Type::UIntTy);
- // Emit an ADD to add FieldOff to the basePtr.
- BMI(MBB, IP, X86::ADDri32, 2,NextReg).addReg(BaseReg).addZImm(FieldOff);
- }
- // The next type is the member of the structure selected by the
- // index.
- Ty = StTy->getElementTypes()[idxValue];
- } else if (const SequentialType *SqTy = cast<SequentialType>(Ty)) {
+ if (IndexReg == 0 && Disp == 0)
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
+ else
+ addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TargetReg),
+ BaseReg, Scale, IndexReg, Disp);
+ --IP;
+ TargetReg = NextTarget;
+ } else if (GEPTypes.empty()) {
+ // The getGEPIndex operation didn't want to build an LEA. Check to see if
+ // all operands are consumed but the base pointer. If so, just load it
+ // into the register.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps[0])) {
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(GV);
+ } else {
+ unsigned BaseReg = getReg(GEPOps[0], MBB, IP);
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
+ }
+ break; // we are now done
+
+ } else {
// It's an array or pointer access: [ArraySize x ElementType].
+ const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
+ Value *idx = GEPOps.back();
+ GEPOps.pop_back(); // Consume a GEP operand
+ GEPTypes.pop_back();
// idx is the index into the array. Unlike with structure
// indices, we may not know its actual value at code-generation
@@ -2064,41 +2588,54 @@
// We want to add BaseReg to(idxReg * sizeof ElementType). First, we
// must find the size of the pointed-to type (Not coincidentally, the next
// type is the type of the elements in the array).
- Ty = SqTy->getElementType();
- unsigned elementSize = TD.getTypeSize(Ty);
+ const Type *ElTy = SqTy->getElementType();
+ unsigned elementSize = TD.getTypeSize(ElTy);
// If idxReg is a constant, we don't need to perform the multiply!
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
if (!CSI->isNullValue()) {
unsigned Offset = elementSize*CSI->getValue();
- NextReg = makeAnotherReg(Type::UIntTy);
- BMI(MBB, IP, X86::ADDri32, 2,NextReg).addReg(BaseReg).addZImm(Offset);
+ unsigned Reg = makeAnotherReg(Type::UIntTy);
+ BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg)
+ .addReg(Reg).addImm(Offset);
+ --IP; // Insert the next instruction before this one.
+ TargetReg = Reg; // Codegen the rest of the GEP into this
}
} else if (elementSize == 1) {
// If the element size is 1, we don't have to multiply, just add
unsigned idxReg = getReg(idx, MBB, IP);
- NextReg = makeAnotherReg(Type::UIntTy);
- BMI(MBB, IP, X86::ADDrr32, 2, NextReg).addReg(BaseReg).addReg(idxReg);
+ unsigned Reg = makeAnotherReg(Type::UIntTy);
+ BuildMI(*MBB, IP, X86::ADD32rr, 2,TargetReg).addReg(Reg).addReg(idxReg);
+ --IP; // Insert the next instruction before this one.
+ TargetReg = Reg; // Codegen the rest of the GEP into this
} else {
unsigned idxReg = getReg(idx, MBB, IP);
unsigned OffsetReg = makeAnotherReg(Type::UIntTy);
+ // Make sure we can back the iterator up to point to the first
+ // instruction emitted.
+ MachineBasicBlock::iterator BeforeIt = IP;
+ if (IP == MBB->begin())
+ BeforeIt = MBB->end();
+ else
+ --BeforeIt;
doMultiplyConst(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSize);
// Emit an ADD to add OffsetReg to the basePtr.
- NextReg = makeAnotherReg(Type::UIntTy);
- BMI(MBB, IP, X86::ADDrr32, 2,NextReg).addReg(BaseReg).addReg(OffsetReg);
+ unsigned Reg = makeAnotherReg(Type::UIntTy);
+ BuildMI(*MBB, IP, X86::ADD32rr, 2, TargetReg)
+ .addReg(Reg).addReg(OffsetReg);
+
+ // Step to the first instruction of the multiply.
+ if (BeforeIt == MBB->end())
+ IP = MBB->begin();
+ else
+ IP = ++BeforeIt;
+
+ TargetReg = Reg; // Codegen the rest of the GEP into this
}
}
- // Now that we are here, further indices refer to subtypes of this
- // one, so we don't need to worry about BaseReg itself, anymore.
- BaseReg = NextReg;
- }
- // After we have processed all the indices, the result is left in
- // BaseReg. Move it to the register where we were expected to
- // put the answer. A 32-bit move should do it, because we are in
- // ILP32 land.
- BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
+ }
}
@@ -2120,7 +2657,7 @@
// Create a new stack object using the frame manager...
int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
- addFrameReference(BuildMI(BB, X86::LEAr32, 5, getReg(I)), FrameIdx);
+ addFrameReference(BuildMI(BB, X86::LEA32r, 5, getReg(I)), FrameIdx);
return;
}
}
@@ -2136,18 +2673,18 @@
// AddedSize = add <TotalSizeReg>, 15
unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::ADDri32, 2, AddedSizeReg).addReg(TotalSizeReg).addZImm(15);
+ BuildMI(BB, X86::ADD32ri, 2, AddedSizeReg).addReg(TotalSizeReg).addImm(15);
// AlignedSize = and <AddedSize>, ~15
unsigned AlignedSize = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::ANDri32, 2, AlignedSize).addReg(AddedSizeReg).addZImm(~15);
+ BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15);
// Subtract size from stack pointer, thereby allocating some space.
- BuildMI(BB, X86::SUBrr32, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
+ BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
// Put a pointer to the space into the result register, by copying
// the stack pointer.
- BuildMI(BB, X86::MOVrr32, 1, getReg(I)).addReg(X86::ESP);
+ BuildMI(BB, X86::MOV32rr, 1, getReg(I)).addReg(X86::ESP);
// Inform the Frame Information that we have just allocated a variable-sized
// object.
Index: llvm/lib/Target/X86/PeepholeOptimizer.cpp
diff -u llvm/lib/Target/X86/PeepholeOptimizer.cpp:1.9 llvm/lib/Target/X86/PeepholeOptimizer.cpp:1.9.2.1
--- llvm/lib/Target/X86/PeepholeOptimizer.cpp:1.9 Sun Dec 14 07:24:15 2003
+++ llvm/lib/Target/X86/PeepholeOptimizer.cpp Mon Mar 1 17:58:15 2004
@@ -14,12 +14,18 @@
#include "X86.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "Support/Statistic.h"
+#include "Support/STLExtras.h"
+
using namespace llvm;
namespace {
Statistic<> NumPHOpts("x86-peephole",
"Number of peephole optimization performed");
+ Statistic<> NumPHMoves("x86-peephole", "Number of peephole moves folded");
struct PH : public MachineFunctionPass {
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -49,16 +55,18 @@
bool PH::PeepholeOptimize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &I) {
- MachineInstr *MI = *I;
- MachineInstr *Next = (I+1 != MBB.end()) ? *(I+1) : 0;
+ assert(I != MBB.end());
+ MachineBasicBlock::iterator NextI = next(I);
+
+ MachineInstr *MI = I;
+ MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0;
unsigned Size = 0;
switch (MI->getOpcode()) {
- case X86::MOVrr8:
- case X86::MOVrr16:
- case X86::MOVrr32: // Destroy X = X copies...
+ case X86::MOV8rr:
+ case X86::MOV16rr:
+ case X86::MOV32rr: // Destroy X = X copies...
if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
I = MBB.erase(I);
- delete MI;
return true;
}
return false;
@@ -67,12 +75,7 @@
// immediate despite the fact that the operands are 16 or 32 bits. Because
// this can save three bytes of code size (and icache space), we want to
// shrink them if possible.
- case X86::ADDri16: case X86::ADDri32:
- case X86::SUBri16: case X86::SUBri32:
- case X86::IMULri16: case X86::IMULri32:
- case X86::ANDri16: case X86::ANDri32:
- case X86::ORri16: case X86::ORri32:
- case X86::XORri16: case X86::XORri32:
+ case X86::IMUL16rri: case X86::IMUL32rri:
assert(MI->getNumOperands() == 3 && "These should all have 3 operands!");
if (MI->getOperand(2).isImmediate()) {
int Val = MI->getOperand(2).getImmedValue();
@@ -81,41 +84,127 @@
unsigned Opcode;
switch (MI->getOpcode()) {
default: assert(0 && "Unknown opcode value!");
- case X86::ADDri16: Opcode = X86::ADDri16b; break;
- case X86::ADDri32: Opcode = X86::ADDri32b; break;
- case X86::SUBri16: Opcode = X86::SUBri16b; break;
- case X86::SUBri32: Opcode = X86::SUBri32b; break;
- case X86::IMULri16: Opcode = X86::IMULri16b; break;
- case X86::IMULri32: Opcode = X86::IMULri32b; break;
- case X86::ANDri16: Opcode = X86::ANDri16b; break;
- case X86::ANDri32: Opcode = X86::ANDri32b; break;
- case X86::ORri16: Opcode = X86::ORri16b; break;
- case X86::ORri32: Opcode = X86::ORri32b; break;
- case X86::XORri16: Opcode = X86::XORri16b; break;
- case X86::XORri32: Opcode = X86::XORri32b; break;
+ case X86::IMUL16rri: Opcode = X86::IMUL16rri8; break;
+ case X86::IMUL32rri: Opcode = X86::IMUL32rri8; break;
}
unsigned R0 = MI->getOperand(0).getReg();
unsigned R1 = MI->getOperand(1).getReg();
- *I = BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val);
- delete MI;
+ I = MBB.insert(MBB.erase(I),
+ BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val));
+ return true;
+ }
+ }
+ return false;
+
+#if 0
+ case X86::IMUL16rmi: case X86::IMUL32rmi:
+ assert(MI->getNumOperands() == 6 && "These should all have 6 operands!");
+ if (MI->getOperand(5).isImmediate()) {
+ int Val = MI->getOperand(5).getImmedValue();
+ // If the value is the same when signed extended from 8 bits...
+ if (Val == (signed int)(signed char)Val) {
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown opcode value!");
+ case X86::IMUL16rmi: Opcode = X86::IMUL16rmi8; break;
+ case X86::IMUL32rmi: Opcode = X86::IMUL32rmi8; break;
+ }
+ unsigned R0 = MI->getOperand(0).getReg();
+ unsigned R1 = MI->getOperand(1).getReg();
+ unsigned Scale = MI->getOperand(2).getImmedValue();
+ unsigned R2 = MI->getOperand(3).getReg();
+ unsigned Offset = MI->getOperand(4).getImmedValue();
+ I = MBB.insert(MBB.erase(I),
+ BuildMI(Opcode, 5, R0).addReg(R1).addZImm(Scale).
+ addReg(R2).addSImm(Offset).addZImm((char)Val));
+ return true;
+ }
+ }
+ return false;
+#endif
+
+ case X86::ADD16ri: case X86::ADD32ri:
+ case X86::SUB16ri: case X86::SUB32ri:
+ case X86::AND16ri: case X86::AND32ri:
+ case X86::OR16ri: case X86::OR32ri:
+ case X86::XOR16ri: case X86::XOR32ri:
+ assert(MI->getNumOperands() == 2 && "These should all have 2 operands!");
+ if (MI->getOperand(1).isImmediate()) {
+ int Val = MI->getOperand(1).getImmedValue();
+ // If the value is the same when signed extended from 8 bits...
+ if (Val == (signed int)(signed char)Val) {
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown opcode value!");
+ case X86::ADD16ri: Opcode = X86::ADD16ri8; break;
+ case X86::ADD32ri: Opcode = X86::ADD32ri8; break;
+ case X86::SUB16ri: Opcode = X86::SUB16ri8; break;
+ case X86::SUB32ri: Opcode = X86::SUB32ri8; break;
+ case X86::AND16ri: Opcode = X86::AND16ri8; break;
+ case X86::AND32ri: Opcode = X86::AND32ri8; break;
+ case X86::OR16ri: Opcode = X86::OR16ri8; break;
+ case X86::OR32ri: Opcode = X86::OR32ri8; break;
+ case X86::XOR16ri: Opcode = X86::XOR16ri8; break;
+ case X86::XOR32ri: Opcode = X86::XOR32ri8; break;
+ }
+ unsigned R0 = MI->getOperand(0).getReg();
+ I = MBB.insert(MBB.erase(I),
+ BuildMI(Opcode, 1, R0, MachineOperand::UseAndDef)
+ .addZImm((char)Val));
+ return true;
+ }
+ }
+ return false;
+
+ case X86::ADD16mi: case X86::ADD32mi:
+ case X86::SUB16mi: case X86::SUB32mi:
+ case X86::AND16mi: case X86::AND32mi:
+ case X86::OR16mi: case X86::OR32mi:
+ case X86::XOR16mi: case X86::XOR32mi:
+ assert(MI->getNumOperands() == 5 && "These should all have 5 operands!");
+ if (MI->getOperand(4).isImmediate()) {
+ int Val = MI->getOperand(4).getImmedValue();
+ // If the value is the same when signed extended from 8 bits...
+ if (Val == (signed int)(signed char)Val) {
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown opcode value!");
+ case X86::ADD16mi: Opcode = X86::ADD16mi8; break;
+ case X86::ADD32mi: Opcode = X86::ADD32mi8; break;
+ case X86::SUB16mi: Opcode = X86::SUB16mi8; break;
+ case X86::SUB32mi: Opcode = X86::SUB32mi8; break;
+ case X86::AND16mi: Opcode = X86::AND16mi8; break;
+ case X86::AND32mi: Opcode = X86::AND32mi8; break;
+ case X86::OR16mi: Opcode = X86::OR16mi8; break;
+ case X86::OR32mi: Opcode = X86::OR32mi8; break;
+ case X86::XOR16mi: Opcode = X86::XOR16mi8; break;
+ case X86::XOR32mi: Opcode = X86::XOR32mi8; break;
+ }
+ unsigned R0 = MI->getOperand(0).getReg();
+ unsigned Scale = MI->getOperand(1).getImmedValue();
+ unsigned R1 = MI->getOperand(2).getReg();
+ unsigned Offset = MI->getOperand(3).getImmedValue();
+ I = MBB.insert(MBB.erase(I),
+ BuildMI(Opcode, 5).addReg(R0).addZImm(Scale).
+ addReg(R1).addSImm(Offset).addZImm((char)Val));
return true;
}
}
return false;
#if 0
- case X86::MOVir32: Size++;
- case X86::MOVir16: Size++;
- case X86::MOVir8:
+ case X86::MOV32ri: Size++;
+ case X86::MOV16ri: Size++;
+ case X86::MOV8ri:
// FIXME: We can only do this transformation if we know that flags are not
// used here, because XOR clobbers the flags!
if (MI->getOperand(1).isImmediate()) { // avoid mov EAX, <value>
int Val = MI->getOperand(1).getImmedValue();
if (Val == 0) { // mov EAX, 0 -> xor EAX, EAX
- static const unsigned Opcode[] ={X86::XORrr8,X86::XORrr16,X86::XORrr32};
+ static const unsigned Opcode[] ={X86::XOR8rr,X86::XOR16rr,X86::XOR32rr};
unsigned Reg = MI->getOperand(0).getReg();
- *I = BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg);
- delete MI;
+ I = MBB.insert(MBB.erase(I),
+ BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg));
return true;
} else if (Val == -1) { // mov EAX, -1 -> or EAX, -1
// TODO: 'or Reg, -1' has a smaller encoding than 'mov Reg, -1'
@@ -123,12 +212,10 @@
}
return false;
#endif
- case X86::BSWAPr32: // Change bswap EAX, bswap EAX into nothing
- if (Next->getOpcode() == X86::BSWAPr32 &&
+ case X86::BSWAP32r: // Change bswap EAX, bswap EAX into nothing
+ if (Next->getOpcode() == X86::BSWAP32r &&
MI->getOperand(0).getReg() == Next->getOperand(0).getReg()) {
I = MBB.erase(MBB.erase(I));
- delete MI;
- delete Next;
return true;
}
return false;
@@ -144,7 +231,7 @@
// getDefinition - Return the machine instruction that defines the specified
// SSA virtual register.
MachineInstr *getDefinition(unsigned Reg) {
- assert(Reg >= MRegisterInfo::FirstVirtualRegister &&
+ assert(MRegisterInfo::isVirtualRegister(Reg) &&
"use-def chains only exist for SSA registers!");
assert(Reg - MRegisterInfo::FirstVirtualRegister < DefiningInst.size() &&
"Unknown register number!");
@@ -171,11 +258,11 @@
virtual bool runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI!=E; ++BI)
for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); ++I) {
- MachineInstr *MI = *I;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isVirtualRegister() && MO.isDef() && !MO.isUse())
- setDefinition(MO.getReg(), MI);
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && !MO.isUse() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg()))
+ setDefinition(MO.getReg(), I);
}
}
return false;
@@ -233,7 +320,8 @@
/// register, return the machine instruction defining it, otherwise, return
/// null.
MachineInstr *getDefiningInst(MachineOperand &MO) {
- if (MO.isDef() || !MO.isVirtualRegister()) return 0;
+ if (MO.isDef() || !MO.isRegister() ||
+ !MRegisterInfo::isVirtualRegister(MO.getReg())) return 0;
return UDC->getDefinition(MO.getReg());
}
@@ -299,7 +387,7 @@
// Attempt to fold instructions used by the base register into the instruction
if (MachineInstr *DefInst = getDefiningInst(BaseRegOp)) {
switch (DefInst->getOpcode()) {
- case X86::MOVir32:
+ case X86::MOV32ri:
// If there is no displacement set for this instruction set one now.
// FIXME: If we can fold two immediates together, we should do so!
if (DisplacementOp.isImmediate() && !DisplacementOp.getImmedValue()) {
@@ -310,7 +398,7 @@
}
break;
- case X86::ADDrr32:
+ case X86::ADD32rr:
// If the source is a register-register add, and we do not yet have an
// index register, fold the add into the memory address.
if (IndexReg == 0) {
@@ -321,7 +409,7 @@
}
break;
- case X86::SHLir32:
+ case X86::SHL32ri:
// If this shift could be folded into the index portion of the address if
// it were the index register, move it to the index register operand now,
// so it will be folded in below.
@@ -339,7 +427,7 @@
// Attempt to fold instructions used by the index into the instruction
if (MachineInstr *DefInst = getDefiningInst(IndexRegOp)) {
switch (DefInst->getOpcode()) {
- case X86::SHLir32: {
+ case X86::SHL32ri: {
// Figure out what the resulting scale would be if we folded this shift.
unsigned ResScale = Scale * (1 << DefInst->getOperand(2).getImmedValue());
if (isValidScaleAmount(ResScale)) {
@@ -357,41 +445,48 @@
bool SSAPH::PeepholeOptimize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &I) {
- MachineInstr *MI = *I;
- MachineInstr *Next = (I+1 != MBB.end()) ? *(I+1) : 0;
+ MachineBasicBlock::iterator NextI = next(I);
+
+ MachineInstr *MI = I;
+ MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0;
bool Changed = false;
+ const TargetInstrInfo &TII = MBB.getParent()->getTarget().getInstrInfo();
+
// Scan the operands of this instruction. If any operands are
// register-register copies, replace the operand with the source.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
// Is this an SSA register use?
- if (MachineInstr *DefInst = getDefiningInst(MI->getOperand(i)))
+ if (MachineInstr *DefInst = getDefiningInst(MI->getOperand(i))) {
// If the operand is a vreg-vreg copy, it is always safe to replace the
// source value with the input operand.
- if (DefInst->getOpcode() == X86::MOVrr8 ||
- DefInst->getOpcode() == X86::MOVrr16 ||
- DefInst->getOpcode() == X86::MOVrr32) {
- // Don't propagate physical registers into PHI nodes...
- if (MI->getOpcode() != X86::PHI ||
- DefInst->getOperand(1).isVirtualRegister())
- Changed = Propagate(MI, i, DefInst, 1);
+ unsigned Source, Dest;
+ if (TII.isMoveInstr(*DefInst, Source, Dest)) {
+ // Don't propagate physical registers into any instructions.
+ if (DefInst->getOperand(1).isRegister() &&
+ MRegisterInfo::isVirtualRegister(Source)) {
+ MI->getOperand(i).setReg(Source);
+ Changed = true;
+ ++NumPHMoves;
+ }
}
+ }
// Perform instruction specific optimizations.
switch (MI->getOpcode()) {
// Register to memory stores. Format: <base,scale,indexreg,immdisp>, srcreg
- case X86::MOVrm32: case X86::MOVrm16: case X86::MOVrm8:
- case X86::MOVim32: case X86::MOVim16: case X86::MOVim8:
+ case X86::MOV32mr: case X86::MOV16mr: case X86::MOV8mr:
+ case X86::MOV32mi: case X86::MOV16mi: case X86::MOV8mi:
// Check to see if we can fold the source instruction into this one...
if (MachineInstr *SrcInst = getDefiningInst(MI->getOperand(4))) {
switch (SrcInst->getOpcode()) {
// Fold the immediate value into the store, if possible.
- case X86::MOVir8: return Propagate(MI, 4, SrcInst, 1, X86::MOVim8);
- case X86::MOVir16: return Propagate(MI, 4, SrcInst, 1, X86::MOVim16);
- case X86::MOVir32: return Propagate(MI, 4, SrcInst, 1, X86::MOVim32);
+ case X86::MOV8ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV8mi);
+ case X86::MOV16ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV16mi);
+ case X86::MOV32ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV32mi);
default: break;
}
}
@@ -401,9 +496,9 @@
return true;
break;
- case X86::MOVmr32:
- case X86::MOVmr16:
- case X86::MOVmr8:
+ case X86::MOV32rm:
+ case X86::MOV16rm:
+ case X86::MOV8rm:
// If we can optimize the addressing expression, do so now.
if (OptimizeAddress(MI, 1))
return true;
Index: llvm/lib/Target/X86/Printer.cpp
diff -u llvm/lib/Target/X86/Printer.cpp:1.76 llvm/lib/Target/X86/Printer.cpp:1.76.2.1
--- llvm/lib/Target/X86/Printer.cpp:1.76 Wed Jan 14 11:14:42 2004
+++ llvm/lib/Target/X86/Printer.cpp Mon Mar 1 17:58:15 2004
@@ -28,8 +28,7 @@
#include "Support/Statistic.h"
#include "Support/StringExtras.h"
#include "Support/CommandLine.h"
-
-namespace llvm {
+using namespace llvm;
namespace {
Statistic<> EmittedInsts("asm-printer", "Number of machine instrs printed");
@@ -91,7 +90,7 @@
/// using the given target machine description. This should work
/// regardless of whether the function is in SSA form.
///
-FunctionPass *createX86CodePrinterPass(std::ostream &o,TargetMachine &tm){
+FunctionPass *llvm::createX86CodePrinterPass(std::ostream &o,TargetMachine &tm){
return new Printer(o, tm);
}
@@ -145,7 +144,10 @@
assert(CB == ConstantBool::True);
O << "1";
} else if (const ConstantSInt *CI = dyn_cast<ConstantSInt>(CV))
- O << CI->getValue();
+ if (((CI->getValue() << 32) >> 32) == CI->getValue())
+ O << CI->getValue();
+ else
+ O << (unsigned long long)CI->getValue();
else if (const ConstantUInt *CI = dyn_cast<ConstantUInt>(CV))
O << CI->getValue();
else if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(CV))
@@ -365,7 +367,7 @@
II != E; ++II) {
// Print the assembly for the instruction.
O << "\t";
- printMachineInstruction(*II);
+ printMachineInstruction(II);
}
}
@@ -400,7 +402,7 @@
}
// FALLTHROUGH
case MachineOperand::MO_MachineRegister:
- if (MO.getReg() < MRegisterInfo::FirstVirtualRegister)
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
// Bug Workaround: See note in Printer::doInitialization about %.
O << "%" << RI.get(MO.getReg()).Name;
else
@@ -431,16 +433,14 @@
}
}
-static const std::string sizePtr(const TargetInstrDescriptor &Desc) {
- switch (Desc.TSFlags & X86II::ArgMask) {
+static const char* const sizePtr(const TargetInstrDescriptor &Desc) {
+ switch (Desc.TSFlags & X86II::MemMask) {
default: assert(0 && "Unknown arg size!");
- case X86II::Arg8: return "BYTE PTR";
- case X86II::Arg16: return "WORD PTR";
- case X86II::Arg32: return "DWORD PTR";
- case X86II::Arg64: return "QWORD PTR";
- case X86II::ArgF32: return "DWORD PTR";
- case X86II::ArgF64: return "QWORD PTR";
- case X86II::ArgF80: return "XWORD PTR";
+ case X86II::Mem8: return "BYTE PTR";
+ case X86II::Mem16: return "WORD PTR";
+ case X86II::Mem32: return "DWORD PTR";
+ case X86II::Mem64: return "QWORD PTR";
+ case X86II::Mem80: return "XWORD PTR";
}
}
@@ -594,7 +594,7 @@
unsigned Reg = MI->getOperand(0).getReg();
- O << TII.getName(MI->getOpCode()) << " ";
+ O << TII.getName(MI->getOpcode()) << " ";
printOp(MI->getOperand(0));
if (MI->getNumOperands() == 2 &&
(!MI->getOperand(1).isRegister() ||
@@ -609,35 +609,31 @@
return;
}
case X86II::MRMDestReg: {
- // There are two acceptable forms of MRMDestReg instructions, those with 2,
- // 3 and 4 operands:
+ // There are three forms of MRMDestReg instructions, those with 2
+ // or 3 operands:
//
- // 2 Operands: this is for things like mov that do not read a second input
+ // 2 Operands: this is for things like mov that do not read a
+ // second input.
//
- // 3 Operands: in this form, the first two registers (the destination, and
- // the first operand) should be the same, post register allocation. The 3rd
- // operand is an additional input. This should be for things like add
- // instructions.
+ // 2 Operands: two address instructions which def&use the first
+ // argument and use the second as input.
//
- // 4 Operands: This form is for instructions which are 3 operands forms, but
- // have a constant argument as well.
+ // 3 Operands: in this form, two address instructions are the same
+ // as in 2 but have a constant argument as well.
//
bool isTwoAddr = TII.isTwoAddrInstr(Opcode);
assert(MI->getOperand(0).isRegister() &&
(MI->getNumOperands() == 2 ||
- (isTwoAddr && MI->getOperand(1).isRegister() &&
- MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
- (MI->getNumOperands() == 3 ||
- (MI->getNumOperands() == 4 && MI->getOperand(3).isImmediate()))))
+ (MI->getNumOperands() == 3 && MI->getOperand(2).isImmediate()))
&& "Bad format for MRMDestReg!");
- O << TII.getName(MI->getOpCode()) << " ";
+ O << TII.getName(MI->getOpcode()) << " ";
printOp(MI->getOperand(0));
O << ", ";
- printOp(MI->getOperand(1+isTwoAddr));
- if (MI->getNumOperands() == 4) {
+ printOp(MI->getOperand(1));
+ if (MI->getNumOperands() == 3) {
O << ", ";
- printOp(MI->getOperand(3));
+ printOp(MI->getOperand(2));
}
O << "\n";
return;
@@ -647,52 +643,53 @@
// These instructions are the same as MRMDestReg, but instead of having a
// register reference for the mod/rm field, it's a memory reference.
//
- assert(isMem(MI, 0) && MI->getNumOperands() == 4+1 &&
- MI->getOperand(4).isRegister() && "Bad format for MRMDestMem!");
+ assert(isMem(MI, 0) &&
+ (MI->getNumOperands() == 4+1 ||
+ (MI->getNumOperands() == 4+2 && MI->getOperand(5).isImmediate()))
+ && "Bad format for MRMDestMem!");
- O << TII.getName(MI->getOpCode()) << " " << sizePtr(Desc) << " ";
+ O << TII.getName(MI->getOpcode()) << " " << sizePtr(Desc) << " ";
printMemReference(MI, 0);
O << ", ";
printOp(MI->getOperand(4));
+ if (MI->getNumOperands() == 4+2) {
+ O << ", ";
+ printOp(MI->getOperand(5));
+ }
O << "\n";
return;
}
case X86II::MRMSrcReg: {
- // There are three forms that are acceptable for MRMSrcReg instructions,
- // those with 3 and 2 operands:
+ // There are three forms that are acceptable for MRMSrcReg
+ // instructions, those with 2 or 3 operands:
//
- // 3 Operands: in this form, the last register (the second input) is the
- // ModR/M input. The first two operands should be the same, post register
- // allocation. This is for things like: add r32, r/m32
+ // 2 Operands: this is for things like mov that do not read a
+ // second input.
//
- // 3 Operands: in this form, we can have 'INST R, R, imm', which is used for
- // instructions like the IMULri instructions.
+ // 2 Operands: in this form, the last register is the ModR/M
+ // input. The first operand is a def&use. This is for things
+ // like: add r32, r/m32
+ //
+ // 3 Operands: in this form, we can have 'INST R1, R2, imm', which is used
+ // for instructions like the IMULrri instructions.
//
- // 2 Operands: this is for things like mov that do not read a second input
//
assert(MI->getOperand(0).isRegister() &&
MI->getOperand(1).isRegister() &&
- (MI->getNumOperands() == 2 ||
- (MI->getNumOperands() == 3 &&
- (MI->getOperand(2).isRegister() ||
- MI->getOperand(2).isImmediate())))
+ (MI->getNumOperands() == 2 ||
+ (MI->getNumOperands() == 3 &&
+ (MI->getOperand(2).isImmediate())))
&& "Bad format for MRMSrcReg!");
- if (MI->getNumOperands() == 3 &&
- MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
- O << "**";
- O << TII.getName(MI->getOpCode()) << " ";
+ O << TII.getName(MI->getOpcode()) << " ";
printOp(MI->getOperand(0));
-
- // If this is IMULri* instructions, print the non-two-address operand.
- if (MI->getNumOperands() == 3 && MI->getOperand(2).isImmediate()) {
- O << ", ";
- printOp(MI->getOperand(1));
- }
-
O << ", ";
- printOp(MI->getOperand(MI->getNumOperands()-1));
+ printOp(MI->getOperand(1));
+ if (MI->getNumOperands() == 3) {
+ O << ", ";
+ printOp(MI->getOperand(2));
+ }
O << "\n";
return;
}
@@ -703,25 +700,24 @@
//
assert(MI->getOperand(0).isRegister() &&
(MI->getNumOperands() == 1+4 && isMem(MI, 1)) ||
- (MI->getNumOperands() == 2+4 && MI->getOperand(1).isRegister() &&
- isMem(MI, 2))
- && "Bad format for MRMDestReg!");
- if (MI->getNumOperands() == 2+4 &&
- MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
- O << "**";
-
- O << TII.getName(MI->getOpCode()) << " ";
+(MI->getNumOperands() == 2+4 && MI->getOperand(5).isImmediate() && isMem(MI, 1))
+ && "Bad format for MRMSrcMem!");
+ O << TII.getName(MI->getOpcode()) << " ";
printOp(MI->getOperand(0));
O << ", " << sizePtr(Desc) << " ";
- printMemReference(MI, MI->getNumOperands()-4);
+ printMemReference(MI, 1);
+ if (MI->getNumOperands() == 2+4) {
+ O << ", ";
+ printOp(MI->getOperand(5));
+ }
O << "\n";
return;
}
- case X86II::MRMS0r: case X86II::MRMS1r:
- case X86II::MRMS2r: case X86II::MRMS3r:
- case X86II::MRMS4r: case X86II::MRMS5r:
- case X86II::MRMS6r: case X86II::MRMS7r: {
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
// In this form, the following are valid formats:
// 1. sete r
// 2. cmp reg, immediate
@@ -741,7 +737,7 @@
MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
O << "**";
- O << TII.getName(MI->getOpCode()) << " ";
+ O << TII.getName(MI->getOpcode()) << " ";
printOp(MI->getOperand(0));
if (MI->getOperand(MI->getNumOperands()-1).isImmediate()) {
O << ", ";
@@ -753,10 +749,10 @@
return;
}
- case X86II::MRMS0m: case X86II::MRMS1m:
- case X86II::MRMS2m: case X86II::MRMS3m:
- case X86II::MRMS4m: case X86II::MRMS5m:
- case X86II::MRMS6m: case X86II::MRMS7m: {
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
// In this form, the following are valid formats:
// 1. sete [m]
// 2. cmp [m], immediate
@@ -776,7 +772,7 @@
// is misassembled by gas in intel_syntax mode as its 32-bit
// equivalent "fstp DWORD PTR [...]". Workaround: Output the raw
// opcode bytes instead of the instruction.
- if (MI->getOpCode() == X86::FSTPr80) {
+ if (MI->getOpcode() == X86::FSTP80m) {
if ((MI->getOperand(0).getReg() == X86::ESP)
&& (MI->getOperand(1).getImmedValue() == 1)) {
if (Op3.isImmediate() &&
@@ -797,7 +793,7 @@
// misassembled by gas in intel_syntax mode as its 32-bit
// equivalent "fld DWORD PTR [...]". Workaround: Output the raw
// opcode bytes instead of the instruction.
- if (MI->getOpCode() == X86::FLDr80 &&
+ if (MI->getOpcode() == X86::FLD80m &&
MI->getOperand(0).getReg() == X86::ESP &&
MI->getOperand(1).getImmedValue() == 1) {
if (Op3.isImmediate() && Op3.getImmedValue() >= -128 &&
@@ -817,7 +813,7 @@
// 64 bit modes." libopcodes disassembles it as "fild DWORD PTR
// [...]", which is wrong. Workaround: Output the raw opcode bytes
// instead of the instruction.
- if (MI->getOpCode() == X86::FILDr64 &&
+ if (MI->getOpcode() == X86::FILD64m &&
MI->getOperand(0).getReg() == X86::ESP &&
MI->getOperand(1).getImmedValue() == 1) {
if (Op3.isImmediate() && Op3.getImmedValue() >= -128 &&
@@ -838,7 +834,7 @@
// "fistpll DWORD PTR [...]", which is wrong. Workaround: Output
// "fistpll DWORD PTR " instead, which is what libopcodes is
// expecting to see.
- if (MI->getOpCode() == X86::FISTPr64) {
+ if (MI->getOpcode() == X86::FISTP64m) {
O << "fistpll DWORD PTR ";
printMemReference(MI, 0);
if (MI->getNumOperands() == 5) {
@@ -848,7 +844,7 @@
O << "\t# ";
}
- O << TII.getName(MI->getOpCode()) << " ";
+ O << TII.getName(MI->getOpcode()) << " ";
O << sizePtr(Desc) << " ";
printMemReference(MI, 0);
if (MI->getNumOperands() == 5) {
@@ -955,5 +951,3 @@
delete Mang;
return false; // success
}
-
-} // End llvm namespace
Index: llvm/lib/Target/X86/README.txt
diff -u llvm/lib/Target/X86/README.txt:1.10 llvm/lib/Target/X86/README.txt:1.10.6.1
--- llvm/lib/Target/X86/README.txt:1.10 Wed Aug 13 14:02:09 2003
+++ llvm/lib/Target/X86/README.txt Mon Mar 1 17:58:15 2004
@@ -8,14 +8,14 @@
I. Overview
===========
-This directory contains a machine description for the X86 processor. Currently
-this machine description is used for a high performance code generator used by a
-LLVM JIT. One of the main objectives that we would like to support with this
-project is to build a nice clean code generator that may be extended in the
-future in a variety of ways: new targets, new optimizations, new
-transformations, etc.
+This directory contains a machine description for the X86 processor family.
+Currently this machine description is used for a high performance code generator
+used by the LLVM JIT and static code generators. One of the main objectives
+that we would like to support with this project is to build a nice clean code
+generator that may be extended in the future in a variety of ways: new targets,
+new optimizations, new transformations, etc.
-This document describes the current state of the LLVM JIT, along with
+This document describes the current state of the X86 code generator, along with
implementation notes, design decisions, and other stuff.
@@ -33,10 +33,9 @@
At the high-level, LLVM code is translated to a machine specific representation
formed out of MachineFunction, MachineBasicBlock, and MachineInstr instances
(defined in include/llvm/CodeGen). This representation is completely target
-agnostic, representing instructions in their most abstract form: an opcode, a
-destination, and a series of operands. This representation is designed to
-support both SSA representation for machine code, as well as a register
-allocated, non-SSA form.
+agnostic, representing instructions in their most abstract form: an opcode and a
+series of operands. This representation is designed to support both SSA
+representation for machine code, as well as a register allocated, non-SSA form.
Because the Machine* representation must work regardless of the target machine,
it contains very little semantic information about the program. To get semantic
@@ -52,22 +51,22 @@
------------------------------
Target machine instructions are represented as instances of MachineInstr, and
all specific machine instruction types should have an entry in the
-InstructionInfo table defined through X86InstrInfo.def. In the X86 backend,
-there are two particularly interesting forms of machine instruction: those that
-produce a value (such as add), and those that do not (such as a store).
+X86InstrInfo.td file. In the X86 backend, there are two particularly
+interesting forms of machine instruction: those that produce a value (such as
+add), and those that do not (such as a store).
Instructions that produce a value use Operand #0 as the "destination" register.
When printing the assembly code with the built-in machine instruction printer,
these destination registers will be printed to the left side of an '=' sign, as
-in: %reg1027 = addl %reg1026, %reg1025
+in: %reg1027 = add %reg1026, %reg1025
-This 'addl' MachineInstruction contains three "operands": the first is the
+This `add' MachineInstruction contains three "operands": the first is the
destination register (#1027), the second is the first source register (#1026)
and the third is the second source register (#1025). Never forget the
destination register will show up in the MachineInstr operands vector. The code
to generate this instruction looks like this:
- BuildMI(BB, X86::ADDrr32, 2, 1027).addReg(1026).addReg(1025);
+ BuildMI(BB, X86::ADD32rr, 2, 1027).addReg(1026).addReg(1025);
The first argument to BuildMI is the basic block to append the machine
instruction to, the second is the opcode, the third is the number of operands,
@@ -83,7 +82,8 @@
IV. Source Code Layout
======================
-The LLVM-JIT is composed of source files primarily in the following locations:
+The LLVM code generator is composed of source files primarily in the following
+locations:
include/llvm/CodeGen
--------------------
@@ -113,16 +113,15 @@
rest of the compiler working. It contains any code that is truly specific to
the X86 backend, for example the instruction selector and machine code emitter.
-tools/lli/JIT
--------------
+lib/ExecutionEngine/JIT
+-----------------------
This directory contains the top-level code for the JIT compiler. This code
-basically boils down to a call to TargetMachine::addPassesToJITCompile. As we
-progress with the project, this will also contain the compile-dispatch-recompile
-loop.
-
-test/Regression/Jello
----------------------
-This directory contains regression tests for the JIT.
+basically boils down to a call to TargetMachine::addPassesToJITCompile, and
+handles the compile-dispatch-recompile cycle.
+
+test/Regression/CodeGen/X86
+---------------------------
+This directory contains regression tests for the X86 code generator.
==================================================
@@ -150,30 +149,37 @@
way, in the same order.
-==========================
-VI. TODO / Future Projects
-==========================
+======================
+VI. Instruction naming
+======================
-There are a large number of things remaining to do. Here is a partial list:
+An instruction name consists of the base name, a default operand size
+followed by a character per operand with an optional special size. For
+example:
-Next Phase:
------------
-1. Implement linear time optimal instruction selector
-2. Implement smarter (linear scan?) register allocator
+ADD8rr -> add, 8-bit register, 8-bit register
-After this project:
--------------------
-1. Implement lots of nifty runtime optimizations
-2. Implement new targets: IA64? X86-64? M68k? MMIX? Who knows...
+IMUL16rmi -> imul, 16-bit register, 16-bit memory, 16-bit immediate
+
+IMUL16rmi8 -> imul, 16-bit register, 16-bit memory, 8-bit immediate
+
+MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory
+
+
+==========================
+VII. TODO / Future Projects
+==========================
+
+Ideas for Improvements:
+-----------------------
+1. Implement an *optimal* linear time instruction selector
+2. Implement lots of nifty runtime optimizations
+3. Implement new targets: IA64? X86-64? M68k? MMIX? Who knows...
Infrastructure Improvements:
----------------------------
-1. Bytecode is designed to be able to read particular functions from the
- bytecode without having to read the whole program. Bytecode reader should be
- extended to allow on-demand loading of functions.
-
-2. X86/Printer.cpp and Sparc/EmitAssembly.cpp both have copies of what is
+1. X86/Printer.cpp and Sparc/EmitAssembly.cpp both have copies of what is
roughly the same code, used to output constants in a form the assembler
can understand. These functions should be shared at some point. They
should be rewritten to pass around iostreams instead of strings. The
Index: llvm/lib/Target/X86/X86.td
diff -u llvm/lib/Target/X86/X86.td:1.7 llvm/lib/Target/X86/X86.td:1.7.6.1
--- llvm/lib/Target/X86/X86.td:1.7 Tue Oct 21 10:17:13 2003
+++ llvm/lib/Target/X86/X86.td Mon Mar 1 17:58:15 2004
@@ -33,10 +33,10 @@
// Define how we want to layout our TargetSpecific information field... This
// should be kept up-to-date with the fields in the X86InstrInfo.h file.
- let TSFlagsFields = ["FormBits" , "hasOpSizePrefix" , "Prefix", "TypeBits",
- "FPFormBits", "printImplicitUses", "Opcode"];
- let TSFlagsShifts = [ 0, 5, 6, 10,
- 13, 16, 17];
+ let TSFlagsFields = ["FormBits" , "hasOpSizePrefix" , "Prefix", "MemTypeBits",
+ "ImmTypeBits", "FPFormBits", "printImplicitUses", "Opcode"];
+ let TSFlagsShifts = [0, 5, 6, 10, 13,
+ 15, 18, 19];
}
def X86 : Target {
Index: llvm/lib/Target/X86/X86CodeEmitter.cpp
diff -u llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46 llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46.2.1
--- llvm/lib/Target/X86/X86CodeEmitter.cpp:1.46 Sat Dec 20 10:22:59 2003
+++ llvm/lib/Target/X86/X86CodeEmitter.cpp Mon Mar 1 17:58:15 2004
@@ -242,7 +242,7 @@
BasicBlockAddrs[MBB.getBasicBlock()] = Addr;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
- emitInstruction(**I);
+ emitInstruction(*I);
}
@@ -329,7 +329,7 @@
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
default:
- assert(RegNo >= MRegisterInfo::FirstVirtualRegister &&
+ assert(MRegisterInfo::isVirtualRegister(RegNo) &&
"Unknown physical register!");
assert(0 && "Register allocator hasn't allocated reg correctly yet!");
return 0;
@@ -453,14 +453,24 @@
}
}
+static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) {
+ switch (Desc.TSFlags & X86II::ImmMask) {
+ case X86II::Imm8: return 1;
+ case X86II::Imm16: return 2;
+ case X86II::Imm32: return 4;
+ default: assert(0 && "Immediate size not set!");
+ return 0;
+ }
+}
+
static unsigned sizeOfPtr(const TargetInstrDescriptor &Desc) {
- switch (Desc.TSFlags & X86II::ArgMask) {
- case X86II::Arg8: return 1;
- case X86II::Arg16: return 2;
- case X86II::Arg32: return 4;
- case X86II::ArgF32: return 4;
- case X86II::ArgF64: return 8;
- case X86II::ArgF80: return 10;
+ switch (Desc.TSFlags & X86II::MemMask) {
+ case X86II::Mem8: return 1;
+ case X86II::Mem16: return 2;
+ case X86II::Mem32: return 4;
+ case X86II::Mem64: return 8;
+ case X86II::Mem80: return 10;
+ case X86II::Mem128: return 16;
default: assert(0 && "Memory size not set!");
return 0;
}
@@ -472,6 +482,9 @@
unsigned Opcode = MI.getOpcode();
const TargetInstrDescriptor &Desc = II->get(Opcode);
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc.TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
// Emit instruction prefixes if necessary
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
@@ -479,6 +492,7 @@
case X86II::TB:
MCE.emitByte(0x0F); // Two-byte opcode prefix
break;
+ case X86II::REP: break; // already handled.
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
MCE.emitByte(0xD8+
@@ -523,35 +537,31 @@
MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(0).getReg()));
if (MI.getNumOperands() == 2) {
MachineOperand &MO1 = MI.getOperand(1);
- if (MO1.isImmediate() || MO1.getVRegValueOrNull() ||
- MO1.isGlobalAddress() || MO1.isExternalSymbol()) {
- unsigned Size = sizeOfPtr(Desc);
- if (Value *V = MO1.getVRegValueOrNull()) {
- assert(Size == 4 && "Don't know how to emit non-pointer values!");
- emitGlobalAddressForPtr(cast<GlobalValue>(V));
- } else if (MO1.isGlobalAddress()) {
- assert(Size == 4 && "Don't know how to emit non-pointer values!");
- assert(!MO1.isPCRelative() && "Function pointer ref is PC relative?");
- emitGlobalAddressForPtr(MO1.getGlobal());
- } else if (MO1.isExternalSymbol()) {
- assert(Size == 4 && "Don't know how to emit non-pointer values!");
-
- unsigned Address = MCE.getGlobalValueAddress(MO1.getSymbolName());
- assert(Address && "Unknown external symbol!");
- emitMaybePCRelativeValue(Address, MO1.isPCRelative());
- } else {
- emitConstant(MO1.getImmedValue(), Size);
- }
+ if (Value *V = MO1.getVRegValueOrNull()) {
+ assert(sizeOfImm(Desc) == 4 && "Don't know how to emit non-pointer values!");
+ emitGlobalAddressForPtr(cast<GlobalValue>(V));
+ } else if (MO1.isGlobalAddress()) {
+ assert(sizeOfImm(Desc) == 4 && "Don't know how to emit non-pointer values!");
+ assert(!MO1.isPCRelative() && "Function pointer ref is PC relative?");
+ emitGlobalAddressForPtr(MO1.getGlobal());
+ } else if (MO1.isExternalSymbol()) {
+ assert(sizeOfImm(Desc) == 4 && "Don't know how to emit non-pointer values!");
+
+ unsigned Address = MCE.getGlobalValueAddress(MO1.getSymbolName());
+ assert(Address && "Unknown external symbol!");
+ emitMaybePCRelativeValue(Address, MO1.isPCRelative());
+ } else {
+ emitConstant(MO1.getImmedValue(), sizeOfImm(Desc));
}
}
break;
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
- MachineOperand &SrcOp = MI.getOperand(1+II->isTwoAddrInstr(Opcode));
- emitRegModRMByte(MI.getOperand(0).getReg(), getX86RegNum(SrcOp.getReg()));
- if (MI.getNumOperands() == 4)
- emitConstant(MI.getOperand(3).getImmedValue(), sizeOfPtr(Desc));
+ emitRegModRMByte(MI.getOperand(0).getReg(),
+ getX86RegNum(MI.getOperand(1).getReg()));
+ if (MI.getNumOperands() == 3)
+ emitConstant(MI.getOperand(2).getImmedValue(), sizeOfImm(Desc));
break;
}
case X86II::MRMDestMem:
@@ -562,50 +572,46 @@
case X86II::MRMSrcReg:
MCE.emitByte(BaseOpcode);
- if (MI.getNumOperands() == 2) {
- emitRegModRMByte(MI.getOperand(MI.getNumOperands()-1).getReg(),
- getX86RegNum(MI.getOperand(0).getReg()));
- } else if (MI.getOperand(2).isImmediate()) {
- emitRegModRMByte(MI.getOperand(1).getReg(),
- getX86RegNum(MI.getOperand(0).getReg()));
-
- emitConstant(MI.getOperand(2).getImmedValue(), sizeOfPtr(Desc));
- } else {
- emitRegModRMByte(MI.getOperand(2).getReg(),
- getX86RegNum(MI.getOperand(0).getReg()));
- }
+ emitRegModRMByte(MI.getOperand(1).getReg(),
+ getX86RegNum(MI.getOperand(0).getReg()));
+ if (MI.getNumOperands() == 3)
+ emitConstant(MI.getOperand(2).getImmedValue(), sizeOfImm(Desc));
break;
case X86II::MRMSrcMem:
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, MI.getNumOperands()-4,
- getX86RegNum(MI.getOperand(0).getReg()));
+ emitMemModRMByte(MI, 1, getX86RegNum(MI.getOperand(0).getReg()));
+ if (MI.getNumOperands() == 2+4)
+ emitConstant(MI.getOperand(5).getImmedValue(), sizeOfImm(Desc));
break;
- case X86II::MRMS0r: case X86II::MRMS1r:
- case X86II::MRMS2r: case X86II::MRMS3r:
- case X86II::MRMS4r: case X86II::MRMS5r:
- case X86II::MRMS6r: case X86II::MRMS7r:
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(0).getReg(),
- (Desc.TSFlags & X86II::FormMask)-X86II::MRMS0r);
+ (Desc.TSFlags & X86II::FormMask)-X86II::MRM0r);
if (MI.getOperand(MI.getNumOperands()-1).isImmediate()) {
- unsigned Size = sizeOfPtr(Desc);
- emitConstant(MI.getOperand(MI.getNumOperands()-1).getImmedValue(), Size);
+ emitConstant(MI.getOperand(MI.getNumOperands()-1).getImmedValue(), sizeOfImm(Desc));
}
break;
- case X86II::MRMS0m: case X86II::MRMS1m:
- case X86II::MRMS2m: case X86II::MRMS3m:
- case X86II::MRMS4m: case X86II::MRMS5m:
- case X86II::MRMS6m: case X86II::MRMS7m:
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, 0, (Desc.TSFlags & X86II::FormMask)-X86II::MRMS0m);
+ emitMemModRMByte(MI, 0, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
if (MI.getNumOperands() == 5) {
- unsigned Size = sizeOfPtr(Desc);
- emitConstant(MI.getOperand(4).getImmedValue(), Size);
+ if (MI.getOperand(4).isImmediate())
+ emitConstant(MI.getOperand(4).getImmedValue(), sizeOfImm(Desc));
+ else if (MI.getOperand(4).isGlobalAddress())
+ emitGlobalAddressForPtr(MI.getOperand(4).getGlobal());
+ else
+ assert(0 && "Unknown operand!");
}
break;
}
Index: llvm/lib/Target/X86/X86InstrBuilder.h
diff -u llvm/lib/Target/X86/X86InstrBuilder.h:1.9 llvm/lib/Target/X86/X86InstrBuilder.h:1.9.4.1
--- llvm/lib/Target/X86/X86InstrBuilder.h:1.9 Tue Nov 11 16:41:33 2003
+++ llvm/lib/Target/X86/X86InstrBuilder.h Mon Mar 1 17:58:15 2004
@@ -49,6 +49,14 @@
return MIB.addReg(Reg).addZImm(1).addReg(0).addSImm(Offset);
}
+inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ unsigned BaseReg,
+ unsigned Scale,
+ unsigned IndexReg,
+ unsigned Disp) {
+ return MIB.addReg(BaseReg).addZImm(Scale).addReg(IndexReg).addSImm(Disp);
+}
+
/// addFrameReference - This function is used to add a reference to the base of
/// an abstract object on the stack frame of the current function. This
/// reference has base register as the FrameIndex offset until it is resolved.
Index: llvm/lib/Target/X86/X86InstrInfo.cpp
diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.18 llvm/lib/Target/X86/X86InstrInfo.cpp:1.18.2.1
--- llvm/lib/Target/X86/X86InstrInfo.cpp:1.18 Sun Dec 28 11:35:07 2003
+++ llvm/lib/Target/X86/X86InstrInfo.cpp Mon Mar 1 17:58:15 2004
@@ -14,54 +14,26 @@
#include "X86InstrInfo.h"
#include "X86.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-
#include "X86GenInstrInfo.inc"
-
using namespace llvm;
X86InstrInfo::X86InstrInfo()
- : TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0]), 0) {
+ : TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])) {
}
-// createNOPinstr - returns the target's implementation of NOP, which is
-// usually a pseudo-instruction, implemented by a degenerate version of
-// another instruction, e.g. X86: `xchg ax, ax'; SparcV9: `sethi r0, r0, r0'
-//
-MachineInstr* X86InstrInfo::createNOPinstr() const {
- return BuildMI(X86::XCHGrr16, 2).addReg(X86::AX, MOTy::UseAndDef)
- .addReg(X86::AX, MOTy::UseAndDef);
-}
-
-
-/// isNOPinstr - not having a special NOP opcode, we need to know if a given
-/// instruction is interpreted as an `official' NOP instr, i.e., there may be
-/// more than one way to `do nothing' but only one canonical way to slack off.
-//
-bool X86InstrInfo::isNOPinstr(const MachineInstr &MI) const {
- // Make sure the instruction is EXACTLY `xchg ax, ax'
- if (MI.getOpcode() == X86::XCHGrr16) {
- const MachineOperand &op0 = MI.getOperand(0), &op1 = MI.getOperand(1);
- if (op0.isMachineRegister() && op0.getMachineRegNum() == X86::AX &&
- op1.isMachineRegister() && op1.getMachineRegNum() == X86::AX) {
- return true;
- }
- }
- // FIXME: there are several NOOP instructions, we should check for them here.
- return false;
-}
-
bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& sourceReg,
unsigned& destReg) const {
MachineOpCode oc = MI.getOpcode();
- if (oc == X86::MOVrr8 || oc == X86::MOVrr16 || oc == X86::MOVrr32) {
+ if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
+ oc == X86::FpMOV) {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isRegister() &&
MI.getOperand(1).isRegister() &&
"invalid register-register move instruction");
- sourceReg = MI.getOperand(1).getAllocatedRegNum();
- destReg = MI.getOperand(0).getAllocatedRegNum();
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
return true;
}
return false;
Index: llvm/lib/Target/X86/X86InstrInfo.h
diff -u llvm/lib/Target/X86/X86InstrInfo.h:1.30 llvm/lib/Target/X86/X86InstrInfo.h:1.30.2.1
--- llvm/lib/Target/X86/X86InstrInfo.h:1.30 Sun Dec 28 11:35:07 2003
+++ llvm/lib/Target/X86/X86InstrInfo.h Mon Mar 1 17:58:15 2004
@@ -62,18 +62,18 @@
///
MRMSrcMem = 6,
- /// MRMS[0-7][rm] - These forms are used to represent instructions that use
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
/// a Mod/RM byte, and use the middle field to hold extended opcode
/// information. In the intel manual these are represented as /0, /1, ...
///
// First, instructions that operate on a register r/m operand...
- MRMS0r = 16, MRMS1r = 17, MRMS2r = 18, MRMS3r = 19, // Format /0 /1 /2 /3
- MRMS4r = 20, MRMS5r = 21, MRMS6r = 22, MRMS7r = 23, // Format /4 /5 /6 /7
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
// Next, instructions that operate on a memory r/m operand...
- MRMS0m = 24, MRMS1m = 25, MRMS2m = 26, MRMS3m = 27, // Format /0 /1 /2 /3
- MRMS4m = 28, MRMS5m = 29, MRMS6m = 30, MRMS7m = 31, // Format /4 /5 /6 /7
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
FormMask = 31,
@@ -86,9 +86,9 @@
OpSize = 1 << 5,
// Op0Mask - There are several prefix bytes that are used to form two byte
- // opcodes. These are currently 0x0F, and 0xD8-0xDF. This mask is used to
- // obtain the setting of this field. If no bits in this field is set, there
- // is no prefix byte for obtaining a multibyte opcode.
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
//
Op0Shift = 6,
Op0Mask = 0xF << Op0Shift,
@@ -97,33 +97,48 @@
// starts with a 0x0F byte before the real opcode.
TB = 1 << Op0Shift,
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
// D8-DF - These escape opcodes are used by the floating point unit. These
// values must remain sequential.
- D8 = 2 << Op0Shift, D9 = 3 << Op0Shift,
- DA = 4 << Op0Shift, DB = 5 << Op0Shift,
- DC = 6 << Op0Shift, DD = 7 << Op0Shift,
- DE = 8 << Op0Shift, DF = 9 << Op0Shift,
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
//===------------------------------------------------------------------===//
// This three-bit field describes the size of a memory operand. Zero is
// unused so that we can tell if we forgot to set a value.
- ArgShift = 10,
- ArgMask = 7 << ArgShift,
- Arg8 = 1 << ArgShift,
- Arg16 = 2 << ArgShift,
- Arg32 = 3 << ArgShift,
- Arg64 = 4 << ArgShift, // 64 bit int argument for FILD64
- ArgF32 = 5 << ArgShift,
- ArgF64 = 6 << ArgShift,
- ArgF80 = 7 << ArgShift,
+ MemShift = 10,
+ MemMask = 7 << MemShift,
+ Mem8 = 1 << MemShift,
+ Mem16 = 2 << MemShift,
+ Mem32 = 3 << MemShift,
+ Mem64 = 4 << MemShift,
+ Mem80 = 5 << MemShift,
+ Mem128 = 6 << MemShift,
+
+ //===------------------------------------------------------------------===//
+ // This tow-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = 13,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm16 = 2 << ImmShift,
+ Imm32 = 3 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
// FPTypeMask - Mask for all of the FP types...
- FPTypeShift = 13,
+ FPTypeShift = 15,
FPTypeMask = 7 << FPTypeShift,
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
// ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
ZeroArgFP = 1 << FPTypeShift,
@@ -144,9 +159,9 @@
SpecialFP = 5 << FPTypeShift,
// PrintImplUses - Print out implicit uses in the assembly output.
- PrintImplUses = 1 << 16,
+ PrintImplUses = 1 << 18,
- OpcodeShift = 17,
+ OpcodeShift = 19,
OpcodeMask = 0xFF << OpcodeShift,
// Bits 25 -> 31 are unused
};
@@ -163,12 +178,6 @@
///
virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
- /// createNOPinstr - returns the target's implementation of NOP, which is
- /// usually a pseudo-instruction, implemented by a degenerate version of
- /// another instruction, e.g. X86: `xchg ax, ax'; SparcV9: `sethi r0, r0, r0'
- ///
- MachineInstr* createNOPinstr() const;
-
//
// Return true if the instruction is a register to register move and
// leave the source and dest operands in the passed parameters.
@@ -176,12 +185,6 @@
virtual bool isMoveInstr(const MachineInstr& MI,
unsigned& sourceReg,
unsigned& destReg) const;
-
- /// isNOPinstr - not having a special NOP opcode, we need to know if a given
- /// instruction is interpreted as an `official' NOP instr, i.e., there may be
- /// more than one way to `do nothing' but only one canonical way to slack off.
- ///
- bool isNOPinstr(const MachineInstr &MI) const;
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified opcode number.
Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.15 llvm/lib/Target/X86/X86InstrInfo.td:1.15.2.1
--- llvm/lib/Target/X86/X86InstrInfo.td:1.15 Sat Dec 20 10:22:59 2003
+++ llvm/lib/Target/X86/X86InstrInfo.td Mon Mar 1 17:58:15 2004
@@ -24,27 +24,37 @@
def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
def MRMSrcMem : Format<6>;
-def MRMS0r : Format<16>; def MRMS1r : Format<17>; def MRMS2r : Format<18>;
-def MRMS3r : Format<19>; def MRMS4r : Format<20>; def MRMS5r : Format<21>;
-def MRMS6r : Format<22>; def MRMS7r : Format<23>;
-def MRMS0m : Format<24>; def MRMS1m : Format<25>; def MRMS2m : Format<26>;
-def MRMS3m : Format<27>; def MRMS4m : Format<28>; def MRMS5m : Format<29>;
-def MRMS6m : Format<30>; def MRMS7m : Format<31>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
-// ArgType - This specifies the argument type used by an instruction. This is
+// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
// machine code emitter.
-class ArgType<bits<3> val> {
+class ImmType<bits<2> val> {
+ bits<2> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm16 : ImmType<2>;
+def Imm32 : ImmType<3>;
+
+// MemType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class MemType<bits<3> val> {
bits<3> Value = val;
}
-def NoArg : ArgType<0>;
-def Arg8 : ArgType<1>;
-def Arg16 : ArgType<2>;
-def Arg32 : ArgType<3>;
-def Arg64 : ArgType<4>; // 64 bit int argument for FILD64
-def ArgF32 : ArgType<5>;
-def ArgF64 : ArgType<6>;
-def ArgF80 : ArgType<6>;
+def NoMem : MemType<0>;
+def Mem8 : MemType<1>;
+def Mem16 : MemType<2>;
+def Mem32 : MemType<3>;
+def Mem64 : MemType<4>;
+def Mem80 : MemType<4>;
+def Mem128 : MemType<6>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@@ -59,15 +69,17 @@
def SpecialFP : FPFormat<5>;
-class X86Inst<string nam, bits<8> opcod, Format f, ArgType a> : Instruction {
+class X86Inst<string nam, bits<8> opcod, Format f, MemType m, ImmType i> : Instruction {
let Namespace = "X86";
let Name = nam;
bits<8> Opcode = opcod;
Format Form = f;
bits<5> FormBits = Form.Value;
- ArgType Type = a;
- bits<3> TypeBits = Type.Value;
+ MemType MemT = m;
+ bits<3> MemTypeBits = MemT.Value;
+ ImmType ImmT = i;
+ bits<2> ImmTypeBits = ImmT.Value;
// Attributes specific to X86 instructions...
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
@@ -92,43 +104,68 @@
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
class TB { bits<4> Prefix = 1; }
-class D8 { bits<4> Prefix = 2; }
-class D9 { bits<4> Prefix = 3; }
-class DA { bits<4> Prefix = 4; }
-class DB { bits<4> Prefix = 5; }
-class DC { bits<4> Prefix = 6; }
-class DD { bits<4> Prefix = 7; }
-class DE { bits<4> Prefix = 8; }
-class DF { bits<4> Prefix = 9; }
+class REP { bits<4> Prefix = 2; }
+class D8 { bits<4> Prefix = 3; }
+class D9 { bits<4> Prefix = 4; }
+class DA { bits<4> Prefix = 5; }
+class DB { bits<4> Prefix = 6; }
+class DC { bits<4> Prefix = 7; }
+class DD { bits<4> Prefix = 8; }
+class DE { bits<4> Prefix = 9; }
+class DF { bits<4> Prefix = 10; }
+
+
+//===----------------------------------------------------------------------===//
+// Instruction templates...
+
+class I<string n, bits<8> o, Format f> : X86Inst<n, o, f, NoMem, NoImm>;
+
+class Im<string n, bits<8> o, Format f, MemType m> : X86Inst<n, o, f, m, NoImm>;
+class Im8 <string n, bits<8> o, Format f> : Im<n, o, f, Mem8 >;
+class Im16<string n, bits<8> o, Format f> : Im<n, o, f, Mem16>;
+class Im32<string n, bits<8> o, Format f> : Im<n, o, f, Mem32>;
+class Ii<string n, bits<8> o, Format f, ImmType i> : X86Inst<n, o, f, NoMem, i>;
+class Ii8 <string n, bits<8> o, Format f> : Ii<n, o, f, Imm8 >;
+class Ii16<string n, bits<8> o, Format f> : Ii<n, o, f, Imm16>;
+class Ii32<string n, bits<8> o, Format f> : Ii<n, o, f, Imm32>;
+class Im8i8 <string n, bits<8> o, Format f> : X86Inst<n, o, f, Mem8 , Imm8 >;
+class Im16i16<string n, bits<8> o, Format f> : X86Inst<n, o, f, Mem16, Imm16>;
+class Im32i32<string n, bits<8> o, Format f> : X86Inst<n, o, f, Mem32, Imm32>;
+
+class Im16i8<string n, bits<8> o, Format f> : X86Inst<n, o, f, Mem16, Imm8>;
+class Im32i8<string n, bits<8> o, Format f> : X86Inst<n, o, f, Mem32, Imm8>;
+
+// Helper for shift instructions
+class UsesCL { list<Register> Uses = [CL]; bit printImplicitUses = 1; }
//===----------------------------------------------------------------------===//
// Instruction list...
//
-def PHI : X86Inst<"PHI", 0, Pseudo, NoArg>; // PHI node...
+def PHI : I<"PHI", 0, Pseudo>; // PHI node...
-def NOOP : X86Inst<"nop", 0x90, RawFrm, NoArg>; // nop
+def NOOP : I<"nop", 0x90, RawFrm>; // nop
-def ADJCALLSTACKDOWN : X86Inst<"ADJCALLSTACKDOWN", 0, Pseudo, NoArg>;
-def ADJCALLSTACKUP : X86Inst<"ADJCALLSTACKUP", 0, Pseudo, NoArg>;
-def IMPLICIT_USE : X86Inst<"IMPLICIT_USE", 0, Pseudo, NoArg>;
-def IMPLICIT_DEF : X86Inst<"IMPLICIT_DEF", 0, Pseudo, NoArg>;
+def ADJCALLSTACKDOWN : I<"ADJCALLSTACKDOWN", 0, Pseudo>;
+def ADJCALLSTACKUP : I<"ADJCALLSTACKUP", 0, Pseudo>;
+def IMPLICIT_USE : I<"IMPLICIT_USE", 0, Pseudo>;
+def IMPLICIT_DEF : I<"IMPLICIT_DEF", 0, Pseudo>;
let isTerminator = 1 in
let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
- def FP_REG_KILL : X86Inst<"FP_REG_KILL", 0, Pseudo, NoArg>;
+ def FP_REG_KILL : I<"FP_REG_KILL", 0, Pseudo>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
//
// Return instruction...
let isTerminator = 1, isReturn = 1 in
- def RET : X86Inst<"ret", 0xC3, RawFrm, NoArg>, Pattern<(retvoid)>;
+ def RET : I<"ret", 0xC3, RawFrm>, Pattern<(retvoid)>;
// All branches are RawFrm, Void, Branch, and Terminators
let isBranch = 1, isTerminator = 1 in
- class IBr<string name, bits<8> opcode> : X86Inst<name, opcode, RawFrm, NoArg>;
+ class IBr<string name, bits<8> opcode> : I<name, opcode, RawFrm>;
def JMP : IBr<"jmp", 0xE9>, Pattern<(br basicblock)>;
def JB : IBr<"jb" , 0x82>, TB;
@@ -151,231 +188,420 @@
let isCall = 1 in
// All calls clobber the non-callee saved registers...
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6] in {
- def CALLpcrel32 : X86Inst<"call", 0xE8, RawFrm, NoArg>;
- def CALLr32 : X86Inst<"call", 0xFF, MRMS2r, Arg32>;
- def CALLm32 : X86Inst<"call", 0xFF, MRMS2m, Arg32>;
+ def CALLpcrel32 : I <"call", 0xE8, RawFrm>;
+ def CALL32r : I <"call", 0xFF, MRM2r>;
+ def CALL32m : Im32<"call", 0xFF, MRM2m>;
}
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
//
-def LEAVE : X86Inst<"leave", 0xC9, RawFrm, NoArg>, Imp<[EBP], [EBP]>;
-
-let isTwoAddress = 1 in // R32 = bswap R32
- def BSWAPr32 : X86Inst<"bswap", 0xC8, AddRegFrm, Arg32>, TB;
+def LEAVE : I<"leave", 0xC9, RawFrm>, Imp<[EBP,ESP],[EBP,ESP]>;
+def POP32r : I<"pop", 0x58, AddRegFrm>, Imp<[ESP],[ESP]>;
-def XCHGrr8 : X86Inst<"xchg", 0x86, MRMDestReg, Arg8>; // xchg R8, R8
-def XCHGrr16 : X86Inst<"xchg", 0x87, MRMDestReg, Arg16>, OpSize;// xchg R16, R16
-def XCHGrr32 : X86Inst<"xchg", 0x87, MRMDestReg, Arg32>; // xchg R32, R32
+let isTwoAddress = 1 in // R32 = bswap R32
+ def BSWAP32r : I<"bswap", 0xC8, AddRegFrm>, TB;
-def LEAr16 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg16>, OpSize; // R16 = lea [mem]
-def LEAr32 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg32>; // R32 = lea [mem]
+def XCHG8rr : I <"xchg", 0x86, MRMDestReg>; // xchg R8, R8
+def XCHG16rr : I <"xchg", 0x87, MRMDestReg>, OpSize; // xchg R16, R16
+def XCHG32rr : I <"xchg", 0x87, MRMDestReg>; // xchg R32, R32
+def XCHG8mr : Im8 <"xchg", 0x86, MRMDestMem>; // xchg [mem8], R8
+def XCHG16mr : Im16<"xchg", 0x87, MRMDestMem>, OpSize; // xchg [mem16], R16
+def XCHG32mr : Im32<"xchg", 0x87, MRMDestMem>; // xchg [mem32], R32
+def XCHG8rm : Im8 <"xchg", 0x86, MRMSrcMem >; // xchg R8, [mem8]
+def XCHG16rm : Im16<"xchg", 0x87, MRMSrcMem >, OpSize; // xchg R16, [mem16]
+def XCHG32rm : Im32<"xchg", 0x87, MRMSrcMem >; // xchg R32, [mem32]
+
+def LEA16r : Im32<"lea", 0x8D, MRMSrcMem>, OpSize; // R16 = lea [mem]
+def LEA32r : Im32<"lea", 0x8D, MRMSrcMem>; // R32 = lea [mem]
+
+
+def REP_MOVSB : I<"rep movsb", 0xA4, RawFrm>, REP,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSW : I<"rep movsw", 0xA5, RawFrm>, REP, OpSize,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSD : I<"rep movsd", 0xA5, RawFrm>, REP,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+
+def REP_STOSB : I<"rep stosb", 0xAA, RawFrm>, REP,
+ Imp<[AL,ECX,EDI], [ECX,EDI]>;
+def REP_STOSW : I<"rep stosw", 0xAB, RawFrm>, REP, OpSize,
+ Imp<[AX,ECX,EDI], [ECX,EDI]>;
+def REP_STOSD : I<"rep stosd", 0xAB, RawFrm>, REP,
+ Imp<[EAX,ECX,EDI], [ECX,EDI]>;
//===----------------------------------------------------------------------===//
// Move Instructions...
//
-def MOVrr8 : X86Inst<"mov", 0x88, MRMDestReg, Arg8>, Pattern<(set R8 , R8 )>;
-def MOVrr16 : X86Inst<"mov", 0x89, MRMDestReg, Arg16>, OpSize, Pattern<(set R16, R16)>;
-def MOVrr32 : X86Inst<"mov", 0x89, MRMDestReg, Arg32>, Pattern<(set R32, R32)>;
-def MOVir8 : X86Inst<"mov", 0xB0, AddRegFrm , Arg8>, Pattern<(set R8 , imm )>;
-def MOVir16 : X86Inst<"mov", 0xB8, AddRegFrm , Arg16>, OpSize, Pattern<(set R16, imm)>;
-def MOVir32 : X86Inst<"mov", 0xB8, AddRegFrm , Arg32>, Pattern<(set R32, imm)>;
-def MOVim8 : X86Inst<"mov", 0xC6, MRMS0m , Arg8>; // [mem] = imm8
-def MOVim16 : X86Inst<"mov", 0xC7, MRMS0m , Arg16>, OpSize; // [mem] = imm16
-def MOVim32 : X86Inst<"mov", 0xC7, MRMS0m , Arg32>; // [mem] = imm32
+def MOV8rr : I <"mov", 0x88, MRMDestReg>, Pattern<(set R8 , R8 )>;
+def MOV16rr : I <"mov", 0x89, MRMDestReg>, OpSize, Pattern<(set R16, R16)>;
+def MOV32rr : I <"mov", 0x89, MRMDestReg>, Pattern<(set R32, R32)>;
+def MOV8ri : Ii8 <"mov", 0xB0, AddRegFrm >, Pattern<(set R8 , imm )>;
+def MOV16ri : Ii16 <"mov", 0xB8, AddRegFrm >, OpSize, Pattern<(set R16, imm)>;
+def MOV32ri : Ii32 <"mov", 0xB8, AddRegFrm >, Pattern<(set R32, imm)>;
+def MOV8mi : Im8i8 <"mov", 0xC6, MRM0m >; // [mem8] = imm8
+def MOV16mi : Im16i16<"mov", 0xC7, MRM0m >, OpSize; // [mem16] = imm16
+def MOV32mi : Im32i32<"mov", 0xC7, MRM0m >; // [mem32] = imm32
-def MOVmr8 : X86Inst<"mov", 0x8A, MRMSrcMem , Arg8>; // R8 = [mem]
-def MOVmr16 : X86Inst<"mov", 0x8B, MRMSrcMem , Arg16>, OpSize, // R16 = [mem]
+def MOV8rm : Im8 <"mov", 0x8A, MRMSrcMem>; // R8 = [mem8]
+def MOV16rm : Im16 <"mov", 0x8B, MRMSrcMem>, OpSize, // R16 = [mem16]
Pattern<(set R16, (load (plus R32, (plus (times imm, R32), imm))))>;
-def MOVmr32 : X86Inst<"mov", 0x8B, MRMSrcMem , Arg32>, // R32 = [mem]
+def MOV32rm : Im32 <"mov", 0x8B, MRMSrcMem>, // R32 = [mem32]
Pattern<(set R32, (load (plus R32, (plus (times imm, R32), imm))))>;
-def MOVrm8 : X86Inst<"mov", 0x88, MRMDestMem, Arg8>; // [mem] = R8
-def MOVrm16 : X86Inst<"mov", 0x89, MRMDestMem, Arg16>, OpSize; // [mem] = R16
-def MOVrm32 : X86Inst<"mov", 0x89, MRMDestMem, Arg32>; // [mem] = R32
+def MOV8mr : Im8 <"mov", 0x88, MRMDestMem>; // [mem8] = R8
+def MOV16mr : Im16 <"mov", 0x89, MRMDestMem>, OpSize; // [mem16] = R16
+def MOV32mr : Im32 <"mov", 0x89, MRMDestMem>; // [mem32] = R32
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
//
// Extra precision multiplication
-def MULr8 : X86Inst<"mul", 0xF6, MRMS4r, Arg8 >, Imp<[AL],[AX]>; // AL,AH = AL*R8
-def MULr16 : X86Inst<"mul", 0xF7, MRMS4r, Arg16>, Imp<[AX],[AX,DX]>, OpSize; // AX,DX = AX*R16
-def MULr32 : X86Inst<"mul", 0xF7, MRMS4r, Arg32>, Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*R32
+def MUL8r : I <"mul", 0xF6, MRM4r>, Imp<[AL],[AX]>; // AL,AH = AL*R8
+def MUL16r : I <"mul", 0xF7, MRM4r>, Imp<[AX],[AX,DX]>, OpSize; // AX,DX = AX*R16
+def MUL32r : I <"mul", 0xF7, MRM4r>, Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*R32
+def MUL8m : Im8 <"mul", 0xF6, MRM4m>, Imp<[AL],[AX]>; // AL,AH = AL*[mem8]
+def MUL16m : Im16<"mul", 0xF7, MRM4m>, Imp<[AX],[AX,DX]>, OpSize; // AX,DX = AX*[mem16]
+def MUL32m : Im32<"mul", 0xF7, MRM4m>, Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*[mem32]
// unsigned division/remainder
-def DIVr8 : X86Inst<"div", 0xF6, MRMS6r, Arg8 >, Imp<[AX],[AX]>; // AX/r8 = AL,AH
-def DIVr16 : X86Inst<"div", 0xF7, MRMS6r, Arg16>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/r16 = AX,DX
-def DIVr32 : X86Inst<"div", 0xF7, MRMS6r, Arg32>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/r32 = EAX,EDX
+def DIV8r : I <"div", 0xF6, MRM6r>, Imp<[AX],[AX]>; // AX/r8 = AL,AH
+def DIV16r : I <"div", 0xF7, MRM6r>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/r16 = AX,DX
+def DIV32r : I <"div", 0xF7, MRM6r>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/r32 = EAX,EDX
+def DIV8m : Im8 <"div", 0xF6, MRM6m>, Imp<[AX],[AX]>; // AX/[mem8] = AL,AH
+def DIV16m : Im16<"div", 0xF7, MRM6m>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/[mem16] = AX,DX
+def DIV32m : Im32<"div", 0xF7, MRM6m>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/[mem32] = EAX,EDX
// signed division/remainder
-def IDIVr8 : X86Inst<"idiv",0xF6, MRMS7r, Arg8 >, Imp<[AX],[AX]>; // AX/r8 = AL,AH
-def IDIVr16: X86Inst<"idiv",0xF7, MRMS7r, Arg16>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/r16 = AX,DX
-def IDIVr32: X86Inst<"idiv",0xF7, MRMS7r, Arg32>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/r32 = EAX,EDX
+def IDIV8r : I <"idiv",0xF6, MRM7r>, Imp<[AX],[AX]>; // AX/r8 = AL,AH
+def IDIV16r: I <"idiv",0xF7, MRM7r>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/r16 = AX,DX
+def IDIV32r: I <"idiv",0xF7, MRM7r>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/r32 = EAX,EDX
+def IDIV8m : Im8 <"idiv",0xF6, MRM7m>, Imp<[AX],[AX]>; // AX/[mem8] = AL,AH
+def IDIV16m: Im16<"idiv",0xF7, MRM7m>, Imp<[AX,DX],[AX,DX]>, OpSize; // DX:AX/[mem16] = AX,DX
+def IDIV32m: Im32<"idiv",0xF7, MRM7m>, Imp<[EAX,EDX],[EAX,EDX]>; // EDX:EAX/[mem32] = EAX,EDX
// Sign-extenders for division
-def CBW : X86Inst<"cbw", 0x98, RawFrm, Arg8 >, Imp<[AL],[AH]>; // AX = signext(AL)
-def CWD : X86Inst<"cwd", 0x99, RawFrm, Arg8 >, Imp<[AX],[DX]>; // DX:AX = signext(AX)
-def CDQ : X86Inst<"cdq", 0x99, RawFrm, Arg8 >, Imp<[EAX],[EDX]>; // EDX:EAX = signext(EAX)
+def CBW : I<"cbw", 0x98, RawFrm >, Imp<[AL],[AH]>; // AX = signext(AL)
+def CWD : I<"cwd", 0x99, RawFrm >, Imp<[AX],[DX]>; // DX:AX = signext(AX)
+def CDQ : I<"cdq", 0x99, RawFrm >, Imp<[EAX],[EDX]>; // EDX:EAX = signext(EAX)
//===----------------------------------------------------------------------===//
// Two address Instructions...
//
-let isTwoAddress = 1 in { // Define some helper classes to make defs shorter.
- class I2A8 <string n, bits<8> o, Format F> : X86Inst<n, o, F, Arg8>;
- class I2A16<string n, bits<8> o, Format F> : X86Inst<n, o, F, Arg16>;
- class I2A32<string n, bits<8> o, Format F> : X86Inst<n, o, F, Arg32>;
-}
+let isTwoAddress = 1 in {
-// unary instructions
-def NEGr8 : I2A8 <"neg", 0xF6, MRMS3r>; // R8 = -R8 = 0-R8
-def NEGr16 : I2A16<"neg", 0xF7, MRMS3r>, OpSize; // R16 = -R16 = 0-R16
-def NEGr32 : I2A32<"neg", 0xF7, MRMS3r>; // R32 = -R32 = 0-R32
-def NOTr8 : I2A8 <"not", 0xF6, MRMS2r>; // R8 = ~R8 = R8^-1
-def NOTr16 : I2A16<"not", 0xF7, MRMS2r>, OpSize; // R16 = ~R16 = R16^-1
-def NOTr32 : I2A32<"not", 0xF7, MRMS2r>; // R32 = ~R32 = R32^-1
-
-def INCr8 : I2A8 <"inc", 0xFE, MRMS0r>; // R8 = R8 +1
-def INCr16 : I2A16<"inc", 0xFF, MRMS0r>, OpSize; // R16 = R16+1
-def INCr32 : I2A32<"inc", 0xFF, MRMS0r>; // R32 = R32+1
-def DECr8 : I2A8 <"dec", 0xFE, MRMS1r>; // R8 = R8 -1
-def DECr16 : I2A16<"dec", 0xFF, MRMS1r>, OpSize; // R16 = R16-1
-def DECr32 : I2A32<"dec", 0xFF, MRMS1r>; // R32 = R32-1
+// Conditional moves. These are modelled as X = cmovXX Y, Z. Eventually
+// register allocated to cmovXX XY, Z
+def CMOVE16rr : I<"cmove", 0x44, MRMSrcReg>, TB, OpSize; // if ==, R16 = R16
+def CMOVNE32rr: I<"cmovne",0x45, MRMSrcReg>, TB; // if !=, R32 = R32
+def CMOVS32rr : I<"cmovs", 0x48, MRMSrcReg>, TB; // if signed, R32 = R32
+// unary instructions
+def NEG8r : I <"neg", 0xF6, MRM3r>; // R8 = -R8 = 0-R8
+def NEG16r : I <"neg", 0xF7, MRM3r>, OpSize; // R16 = -R16 = 0-R16
+def NEG32r : I <"neg", 0xF7, MRM3r>; // R32 = -R32 = 0-R32
+def NEG8m : Im8 <"neg", 0xF6, MRM3m>; // [mem8] = -[mem8] = 0-[mem8]
+def NEG16m : Im16<"neg", 0xF7, MRM3m>, OpSize; // [mem16] = -[mem16] = 0-[mem16]
+def NEG32m : Im32<"neg", 0xF7, MRM3m>; // [mem32] = -[mem32] = 0-[mem32]
+
+def NOT8r : I <"not", 0xF6, MRM2r>; // R8 = ~R8 = R8^-1
+def NOT16r : I <"not", 0xF7, MRM2r>, OpSize; // R16 = ~R16 = R16^-1
+def NOT32r : I <"not", 0xF7, MRM2r>; // R32 = ~R32 = R32^-1
+def NOT8m : Im8 <"not", 0xF6, MRM2m>; // [mem8] = ~[mem8] = [mem8^-1]
+def NOT16m : Im16<"not", 0xF7, MRM2m>, OpSize; // [mem16] = ~[mem16] = [mem16^-1]
+def NOT32m : Im32<"not", 0xF7, MRM2m>; // [mem32] = ~[mem32] = [mem32^-1]
+
+def INC8r : I <"inc", 0xFE, MRM0r>; // ++R8
+def INC16r : I <"inc", 0xFF, MRM0r>, OpSize; // ++R16
+def INC32r : I <"inc", 0xFF, MRM0r>; // ++R32
+def INC8m : Im8 <"inc", 0xFE, MRM0m>; // ++R8
+def INC16m : Im16<"inc", 0xFF, MRM0m>, OpSize; // ++R16
+def INC32m : Im32<"inc", 0xFF, MRM0m>; // ++R32
+
+def DEC8r : I <"dec", 0xFE, MRM1r>; // --R8
+def DEC16r : I <"dec", 0xFF, MRM1r>, OpSize; // --R16
+def DEC32r : I <"dec", 0xFF, MRM1r>; // --R32
+def DEC8m : Im8 <"dec", 0xFE, MRM1m>; // --[mem8]
+def DEC16m : Im16<"dec", 0xFF, MRM1m>, OpSize; // --[mem16]
+def DEC32m : Im32<"dec", 0xFF, MRM1m>; // --[mem32]
+// Logical operators...
+def AND8rr : I <"and", 0x20, MRMDestReg>, Pattern<(set R8 , (and R8 , R8 ))>;
+def AND16rr : I <"and", 0x21, MRMDestReg>, OpSize, Pattern<(set R16, (and R16, R16))>;
+def AND32rr : I <"and", 0x21, MRMDestReg>, Pattern<(set R32, (and R32, R32))>;
+def AND8mr : Im8 <"and", 0x20, MRMDestMem>; // [mem8] &= R8
+def AND16mr : Im16 <"and", 0x21, MRMDestMem>, OpSize; // [mem16] &= R16
+def AND32mr : Im32 <"and", 0x21, MRMDestMem>; // [mem32] &= R32
+def AND8rm : Im8 <"and", 0x22, MRMSrcMem >; // R8 &= [mem8]
+def AND16rm : Im16 <"and", 0x23, MRMSrcMem >, OpSize; // R16 &= [mem16]
+def AND32rm : Im32 <"and", 0x23, MRMSrcMem >; // R32 &= [mem32]
+
+def AND8ri : Ii8 <"and", 0x80, MRM4r >, Pattern<(set R8 , (and R8 , imm))>;
+def AND16ri : Ii16 <"and", 0x81, MRM4r >, OpSize, Pattern<(set R16, (and R16, imm))>;
+def AND32ri : Ii32 <"and", 0x81, MRM4r >, Pattern<(set R32, (and R32, imm))>;
+def AND8mi : Im8i8 <"and", 0x80, MRM4m >; // [mem8] &= imm8
+def AND16mi : Im16i16 <"and", 0x81, MRM4m >, OpSize; // [mem16] &= imm16
+def AND32mi : Im32i32 <"and", 0x81, MRM4m >; // [mem32] &= imm32
+
+def AND16ri8 : Ii8 <"and", 0x83, MRM4r >, OpSize; // R16 &= imm8
+def AND32ri8 : Ii8 <"and", 0x83, MRM4r >; // R32 &= imm8
+def AND16mi8 : Im16i8<"and", 0x83, MRM4m >, OpSize; // [mem16] &= imm8
+def AND32mi8 : Im32i8<"and", 0x83, MRM4m >; // [mem32] &= imm8
+
+
+def OR8rr : I <"or" , 0x08, MRMDestReg>, Pattern<(set R8 , (or R8 , R8 ))>;
+def OR16rr : I <"or" , 0x09, MRMDestReg>, OpSize, Pattern<(set R16, (or R16, R16))>;
+def OR32rr : I <"or" , 0x09, MRMDestReg>, Pattern<(set R32, (or R32, R32))>;
+def OR8mr : Im8 <"or" , 0x08, MRMDestMem>; // [mem8] |= R8
+def OR16mr : Im16 <"or" , 0x09, MRMDestMem>, OpSize; // [mem16] |= R16
+def OR32mr : Im32 <"or" , 0x09, MRMDestMem>; // [mem32] |= R32
+def OR8rm : Im8 <"or" , 0x0A, MRMSrcMem >; // R8 |= [mem8]
+def OR16rm : Im16 <"or" , 0x0B, MRMSrcMem >, OpSize; // R16 |= [mem16]
+def OR32rm : Im32 <"or" , 0x0B, MRMSrcMem >; // R32 |= [mem32]
+
+def OR8ri : Ii8 <"or" , 0x80, MRM1r >, Pattern<(set R8 , (or R8 , imm))>;
+def OR16ri : Ii16 <"or" , 0x81, MRM1r >, OpSize, Pattern<(set R16, (or R16, imm))>;
+def OR32ri : Ii32 <"or" , 0x81, MRM1r >, Pattern<(set R32, (or R32, imm))>;
+def OR8mi : Im8i8 <"or" , 0x80, MRM1m >; // [mem8] |= imm8
+def OR16mi : Im16i16 <"or" , 0x81, MRM1m >, OpSize; // [mem16] |= imm16
+def OR32mi : Im32i32 <"or" , 0x81, MRM1m >; // [mem32] |= imm32
+
+def OR16ri8 : Ii8 <"or" , 0x83, MRM1r >, OpSize; // R16 |= imm8
+def OR32ri8 : Ii8 <"or" , 0x83, MRM1r >; // R32 |= imm8
+def OR16mi8 : Im16i8<"or" , 0x83, MRM1m >, OpSize; // [mem16] |= imm8
+def OR32mi8 : Im32i8<"or" , 0x83, MRM1m >; // [mem32] |= imm8
+
+
+def XOR8rr : I <"xor", 0x30, MRMDestReg>, Pattern<(set R8 , (xor R8 , R8 ))>;
+def XOR16rr : I <"xor", 0x31, MRMDestReg>, OpSize, Pattern<(set R16, (xor R16, R16))>;
+def XOR32rr : I <"xor", 0x31, MRMDestReg>, Pattern<(set R32, (xor R32, R32))>;
+def XOR8mr : Im8 <"xor", 0x30, MRMDestMem>; // [mem8] ^= R8
+def XOR16mr : Im16 <"xor", 0x31, MRMDestMem>, OpSize; // [mem16] ^= R16
+def XOR32mr : Im32 <"xor", 0x31, MRMDestMem>; // [mem32] ^= R32
+def XOR8rm : Im8 <"xor", 0x32, MRMSrcMem >; // R8 ^= [mem8]
+def XOR16rm : Im16 <"xor", 0x33, MRMSrcMem >, OpSize; // R16 ^= [mem16]
+def XOR32rm : Im32 <"xor", 0x33, MRMSrcMem >; // R32 ^= [mem32]
+
+def XOR8ri : Ii8 <"xor", 0x80, MRM6r >, Pattern<(set R8 , (xor R8 , imm))>;
+def XOR16ri : Ii16 <"xor", 0x81, MRM6r >, OpSize, Pattern<(set R16, (xor R16, imm))>;
+def XOR32ri : Ii32 <"xor", 0x81, MRM6r >, Pattern<(set R32, (xor R32, imm))>;
+def XOR8mi : Im8i8 <"xor", 0x80, MRM6m >; // [mem8] ^= R8
+def XOR16mi : Im16i16 <"xor", 0x81, MRM6m >, OpSize; // [mem16] ^= R16
+def XOR32mi : Im32i32 <"xor", 0x81, MRM6m >; // [mem32] ^= R32
+
+def XOR16ri8 : Ii8 <"xor", 0x83, MRM6r >, OpSize; // R16 ^= imm8
+def XOR32ri8 : Ii8 <"xor", 0x83, MRM6r >; // R32 ^= imm8
+def XOR16mi8 : Im16i8<"xor", 0x83, MRM6m >, OpSize; // [mem16] ^= imm8
+def XOR32mi8 : Im32i8<"xor", 0x83, MRM6m >; // [mem32] ^= imm8
-// Arithmetic...
-def ADDrr8 : I2A8 <"add", 0x00, MRMDestReg>, Pattern<(set R8 , (plus R8 , R8 ))>;
-def ADDrr16 : I2A16<"add", 0x01, MRMDestReg>, OpSize, Pattern<(set R16, (plus R16, R16))>;
-def ADDrr32 : I2A32<"add", 0x01, MRMDestReg>, Pattern<(set R32, (plus R32, R32))>;
-def ADDri8 : I2A8 <"add", 0x80, MRMS0r >, Pattern<(set R8 , (plus R8 , imm))>;
-def ADDri16 : I2A16<"add", 0x81, MRMS0r >, OpSize, Pattern<(set R16, (plus R16, imm))>;
-def ADDri32 : I2A32<"add", 0x81, MRMS0r >, Pattern<(set R32, (plus R32, imm))>;
-def ADDri16b : I2A8 <"add", 0x83, MRMS0r >, OpSize; // ADDri with sign extended 8 bit imm
-def ADDri32b : I2A8 <"add", 0x83, MRMS0r >;
-
-def ADCrr32 : I2A32<"adc", 0x11, MRMDestReg>; // R32 += imm32+Carry
-
-def SUBrr8 : I2A8 <"sub", 0x28, MRMDestReg>, Pattern<(set R8 , (minus R8 , R8 ))>;
-def SUBrr16 : I2A16<"sub", 0x29, MRMDestReg>, OpSize, Pattern<(set R16, (minus R16, R16))>;
-def SUBrr32 : I2A32<"sub", 0x29, MRMDestReg>, Pattern<(set R32, (minus R32, R32))>;
-def SUBri8 : I2A8 <"sub", 0x80, MRMS5r >, Pattern<(set R8 , (minus R8 , imm))>;
-def SUBri16 : I2A16<"sub", 0x81, MRMS5r >, OpSize, Pattern<(set R16, (minus R16, imm))>;
-def SUBri32 : I2A32<"sub", 0x81, MRMS5r >, Pattern<(set R32, (minus R32, imm))>;
-def SUBri16b : I2A8 <"sub", 0x83, MRMS5r >, OpSize;
-def SUBri32b : I2A8 <"sub", 0x83, MRMS5r >;
-
-def SBBrr32 : I2A32<"sbb", 0x19, MRMDestReg>; // R32 -= R32+Carry
-
-def IMULrr16 : I2A16<"imul", 0xAF, MRMSrcReg>, TB, OpSize, Pattern<(set R16, (times R16, R16))>;
-def IMULrr32 : I2A32<"imul", 0xAF, MRMSrcReg>, TB , Pattern<(set R32, (times R32, R32))>;
-def IMULri16 : I2A16<"imul", 0x69, MRMSrcReg>, OpSize;
-def IMULri32 : I2A32<"imul", 0x69, MRMSrcReg>;
-def IMULri16b : I2A8<"imul", 0x6B, MRMSrcReg>, OpSize;
-def IMULri32b : I2A8<"imul", 0x6B, MRMSrcReg>;
+// Shift instructions
+def SHL8rCL : I <"shl", 0xD2, MRM4r > , UsesCL; // R8 <<= cl
+def SHL16rCL : I <"shl", 0xD3, MRM4r >, OpSize, UsesCL; // R16 <<= cl
+def SHL32rCL : I <"shl", 0xD3, MRM4r > , UsesCL; // R32 <<= cl
+def SHL8mCL : Im8 <"shl", 0xD2, MRM4m > , UsesCL; // [mem8] <<= cl
+def SHL16mCL : Im16 <"shl", 0xD3, MRM4m >, OpSize, UsesCL; // [mem16] <<= cl
+def SHL32mCL : Im32 <"shl", 0xD3, MRM4m > , UsesCL; // [mem32] <<= cl
+
+def SHL8ri : Ii8 <"shl", 0xC0, MRM4r >; // R8 <<= imm8
+def SHL16ri : Ii8 <"shl", 0xC1, MRM4r >, OpSize; // R16 <<= imm8
+def SHL32ri : Ii8 <"shl", 0xC1, MRM4r >; // R32 <<= imm8
+def SHL8mi : Im8i8 <"shl", 0xC0, MRM4m >; // [mem8] <<= imm8
+def SHL16mi : Im16i8<"shl", 0xC1, MRM4m >, OpSize; // [mem16] <<= imm8
+def SHL32mi : Im32i8<"shl", 0xC1, MRM4m >; // [mem32] <<= imm8
+
+def SHR8rCL : I <"shr", 0xD2, MRM5r > , UsesCL; // R8 >>= cl
+def SHR16rCL : I <"shr", 0xD3, MRM5r >, OpSize, UsesCL; // R16 >>= cl
+def SHR32rCL : I <"shr", 0xD3, MRM5r > , UsesCL; // R32 >>= cl
+def SHR8mCL : Im8 <"shr", 0xD2, MRM5m > , UsesCL; // [mem8] >>= cl
+def SHR16mCL : Im16 <"shr", 0xD3, MRM5m >, OpSize, UsesCL; // [mem16] >>= cl
+def SHR32mCL : Im32 <"shr", 0xD3, MRM5m > , UsesCL; // [mem32] >>= cl
+
+def SHR8ri : Ii8 <"shr", 0xC0, MRM5r >; // R8 >>= imm8
+def SHR16ri : Ii8 <"shr", 0xC1, MRM5r >, OpSize; // R16 >>= imm8
+def SHR32ri : Ii8 <"shr", 0xC1, MRM5r >; // R32 >>= imm8
+def SHR8mi : Im8i8 <"shr", 0xC0, MRM5m >; // [mem8] >>= imm8
+def SHR16mi : Im16i8<"shr", 0xC1, MRM5m >, OpSize; // [mem16] >>= imm8
+def SHR32mi : Im32i8<"shr", 0xC1, MRM5m >; // [mem32] >>= imm8
+
+def SAR8rCL : I <"sar", 0xD2, MRM7r > , UsesCL; // R8 >>>= cl
+def SAR16rCL : I <"sar", 0xD3, MRM7r >, OpSize, UsesCL; // R16 >>>= cl
+def SAR32rCL : I <"sar", 0xD3, MRM7r > , UsesCL; // R32 >>>= cl
+def SAR8mCL : Im8 <"sar", 0xD2, MRM7m > , UsesCL; // [mem8] >>>= cl
+def SAR16mCL : Im16 <"sar", 0xD3, MRM7m >, OpSize, UsesCL; // [mem16] >>>= cl
+def SAR32mCL : Im32 <"sar", 0xD3, MRM7m > , UsesCL; // [mem32] >>>= cl
+
+def SAR8ri : Ii8 <"sar", 0xC0, MRM7r >; // R8 >>>= imm8
+def SAR16ri : Ii8 <"sar", 0xC1, MRM7r >, OpSize; // R16 >>>= imm8
+def SAR32ri : Ii8 <"sar", 0xC1, MRM7r >; // R32 >>>= imm8
+def SAR8mi : Im8i8 <"sar", 0xC0, MRM7m >; // [mem8] >>>= imm8
+def SAR16mi : Im16i8<"sar", 0xC1, MRM7m >, OpSize; // [mem16] >>>= imm8
+def SAR32mi : Im32i8<"sar", 0xC1, MRM7m >; // [mem32] >>>= imm8
+
+def SHLD32rrCL : I <"shld", 0xA5, MRMDestReg>, TB, UsesCL; // R32 <<= R32,R32 cl
+def SHLD32mrCL : Im32 <"shld", 0xA5, MRMDestMem>, TB, UsesCL; // [mem32] <<= [mem32],R32 cl
+def SHLD32rri8 : Ii8 <"shld", 0xA4, MRMDestReg>, TB; // R32 <<= R32,R32 imm8
+def SHLD32mri8 : Im32i8<"shld", 0xA4, MRMDestMem>, TB; // [mem32] <<= [mem32],R32 imm8
+
+def SHRD32rrCL : I <"shrd", 0xAD, MRMDestReg>, TB, UsesCL; // R32 >>= R32,R32 cl
+def SHRD32mrCL : Im32 <"shrd", 0xAD, MRMDestMem>, TB, UsesCL; // [mem32] >>= [mem32],R32 cl
+def SHRD32rri8 : Ii8 <"shrd", 0xAC, MRMDestReg>, TB; // R32 >>= R32,R32 imm8
+def SHRD32mri8 : Im32i8<"shrd", 0xAC, MRMDestMem>, TB; // [mem32] >>= [mem32],R32 imm8
-// Logical operators...
-def ANDrr8 : I2A8 <"and", 0x20, MRMDestReg>, Pattern<(set R8 , (and R8 , R8 ))>;
-def ANDrr16 : I2A16<"and", 0x21, MRMDestReg>, OpSize, Pattern<(set R16, (and R16, R16))>;
-def ANDrr32 : I2A32<"and", 0x21, MRMDestReg>, Pattern<(set R32, (and R32, R32))>;
-def ANDri8 : I2A8 <"and", 0x80, MRMS4r >, Pattern<(set R8 , (and R8 , imm))>;
-def ANDri16 : I2A16<"and", 0x81, MRMS4r >, OpSize, Pattern<(set R16, (and R16, imm))>;
-def ANDri32 : I2A32<"and", 0x81, MRMS4r >, Pattern<(set R32, (and R32, imm))>;
-def ANDri16b : I2A8 <"and", 0x83, MRMS4r >, OpSize;
-def ANDri32b : I2A8 <"and", 0x83, MRMS4r >;
-
-def ORrr8 : I2A8 <"or" , 0x08, MRMDestReg>, Pattern<(set R8 , (or R8 , R8 ))>;
-def ORrr16 : I2A16<"or" , 0x09, MRMDestReg>, OpSize, Pattern<(set R16, (or R16, R16))>;
-def ORrr32 : I2A32<"or" , 0x09, MRMDestReg>, Pattern<(set R32, (or R32, R32))>;
-def ORri8 : I2A8 <"or" , 0x80, MRMS1r >, Pattern<(set R8 , (or R8 , imm))>;
-def ORri16 : I2A16<"or" , 0x81, MRMS1r >, OpSize, Pattern<(set R16, (or R16, imm))>;
-def ORri32 : I2A32<"or" , 0x81, MRMS1r >, Pattern<(set R32, (or R32, imm))>;
-def ORri16b : I2A8 <"or" , 0x83, MRMS1r >, OpSize;
-def ORri32b : I2A8 <"or" , 0x83, MRMS1r >;
-
-
-def XORrr8 : I2A8 <"xor", 0x30, MRMDestReg>, Pattern<(set R8 , (xor R8 , R8 ))>;
-def XORrr16 : I2A16<"xor", 0x31, MRMDestReg>, OpSize, Pattern<(set R16, (xor R16, R16))>;
-def XORrr32 : I2A32<"xor", 0x31, MRMDestReg>, Pattern<(set R32, (xor R32, R32))>;
-def XORri8 : I2A8 <"xor", 0x80, MRMS6r >, Pattern<(set R8 , (xor R8 , imm))>;
-def XORri16 : I2A16<"xor", 0x81, MRMS6r >, OpSize, Pattern<(set R16, (xor R16, imm))>;
-def XORri32 : I2A32<"xor", 0x81, MRMS6r >, Pattern<(set R32, (xor R32, imm))>;
-def XORri16b : I2A8 <"xor", 0x83, MRMS6r >, OpSize;
-def XORri32b : I2A8 <"xor", 0x83, MRMS6r >;
+// Arithmetic...
+def ADD8rr : I <"add", 0x00, MRMDestReg>, Pattern<(set R8 , (plus R8 , R8 ))>;
+def ADD16rr : I <"add", 0x01, MRMDestReg>, OpSize, Pattern<(set R16, (plus R16, R16))>;
+def ADD32rr : I <"add", 0x01, MRMDestReg>, Pattern<(set R32, (plus R32, R32))>;
+def ADD8mr : Im8 <"add", 0x00, MRMDestMem>; // [mem8] += R8
+def ADD16mr : Im16 <"add", 0x01, MRMDestMem>, OpSize; // [mem16] += R16
+def ADD32mr : Im32 <"add", 0x01, MRMDestMem>; // [mem32] += R32
+def ADD8rm : Im8 <"add", 0x02, MRMSrcMem >; // R8 += [mem8]
+def ADD16rm : Im16 <"add", 0x03, MRMSrcMem >, OpSize; // R16 += [mem16]
+def ADD32rm : Im32 <"add", 0x03, MRMSrcMem >; // R32 += [mem32]
+
+def ADD8ri : Ii8 <"add", 0x80, MRM0r >, Pattern<(set R8 , (plus R8 , imm))>;
+def ADD16ri : Ii16 <"add", 0x81, MRM0r >, OpSize, Pattern<(set R16, (plus R16, imm))>;
+def ADD32ri : Ii32 <"add", 0x81, MRM0r >, Pattern<(set R32, (plus R32, imm))>;
+def ADD8mi : Im8i8 <"add", 0x80, MRM0m >; // [mem8] += I8
+def ADD16mi : Im16i16 <"add", 0x81, MRM0m >, OpSize; // [mem16] += I16
+def ADD32mi : Im32i32 <"add", 0x81, MRM0m >; // [mem32] += I32
+
+def ADD16ri8 : Ii8 <"add", 0x83, MRM0r >, OpSize; // ADDri with sign extended 8 bit imm
+def ADD32ri8 : Ii8 <"add", 0x83, MRM0r >;
+def ADD16mi8 : Im16i8<"add", 0x83, MRM0m >, OpSize; // [mem16] += I8
+def ADD32mi8 : Im32i8<"add", 0x83, MRM0m >; // [mem32] += I8
+
+def ADC32rr : I <"adc", 0x11, MRMDestReg>; // R32 += R32+Carry
+def ADC32rm : Im32 <"adc", 0x11, MRMSrcMem >; // R32 += [mem32]+Carry
+def ADC32mr : Im32 <"adc", 0x13, MRMDestMem>; // [mem32] += R32+Carry
+
+
+def SUB8rr : I <"sub", 0x28, MRMDestReg>, Pattern<(set R8 , (minus R8 , R8 ))>;
+def SUB16rr : I <"sub", 0x29, MRMDestReg>, OpSize, Pattern<(set R16, (minus R16, R16))>;
+def SUB32rr : I <"sub", 0x29, MRMDestReg>, Pattern<(set R32, (minus R32, R32))>;
+def SUB8mr : Im8 <"sub", 0x28, MRMDestMem>; // [mem8] -= R8
+def SUB16mr : Im16 <"sub", 0x29, MRMDestMem>, OpSize; // [mem16] -= R16
+def SUB32mr : Im32 <"sub", 0x29, MRMDestMem>; // [mem32] -= R32
+def SUB8rm : Im8 <"sub", 0x2A, MRMSrcMem >; // R8 -= [mem8]
+def SUB16rm : Im16 <"sub", 0x2B, MRMSrcMem >, OpSize; // R16 -= [mem16]
+def SUB32rm : Im32 <"sub", 0x2B, MRMSrcMem >; // R32 -= [mem32]
+
+def SUB8ri : Ii8 <"sub", 0x80, MRM5r >, Pattern<(set R8 , (minus R8 , imm))>;
+def SUB16ri : Ii16 <"sub", 0x81, MRM5r >, OpSize, Pattern<(set R16, (minus R16, imm))>;
+def SUB32ri : Ii32 <"sub", 0x81, MRM5r >, Pattern<(set R32, (minus R32, imm))>;
+def SUB8mi : Im8i8 <"sub", 0x80, MRM5m >; // [mem8] -= I8
+def SUB16mi : Im16i16 <"sub", 0x81, MRM5m >, OpSize; // [mem16] -= I16
+def SUB32mi : Im32i32 <"sub", 0x81, MRM5m >; // [mem32] -= I32
+
+def SUB16ri8 : Ii8 <"sub", 0x83, MRM5r >, OpSize;
+def SUB32ri8 : Ii8 <"sub", 0x83, MRM5r >;
+def SUB16mi8 : Im16i8<"sub", 0x83, MRM5m >, OpSize; // [mem16] -= I8
+def SUB32mi8 : Im32i8<"sub", 0x83, MRM5m >; // [mem32] -= I8
+
+def SBB32rr : I <"sbb", 0x19, MRMDestReg>; // R32 -= R32+Borrow
+def SBB32rm : Im32 <"sbb", 0x19, MRMSrcMem >; // R32 -= [mem32]+Borrow
+def SBB32mr : Im32 <"sbb", 0x1B, MRMDestMem>; // [mem32] -= R32+Borrow
+
+def IMUL16rr : I <"imul", 0xAF, MRMSrcReg>, TB, OpSize, Pattern<(set R16, (times R16, R16))>;
+def IMUL32rr : I <"imul", 0xAF, MRMSrcReg>, TB , Pattern<(set R32, (times R32, R32))>;
+def IMUL16rm : Im16 <"imul", 0xAF, MRMSrcMem>, TB, OpSize;
+def IMUL32rm : Im32 <"imul", 0xAF, MRMSrcMem>, TB ;
+
+} // end Two Address instructions
+
+// These are suprisingly enough not two address instructions!
+def IMUL16rri : Ii16 <"imul", 0x69, MRMSrcReg>, OpSize; // R16 = R16*I16
+def IMUL32rri : Ii32 <"imul", 0x69, MRMSrcReg>; // R32 = R32*I32
+def IMUL16rri8 : Ii8 <"imul", 0x6B, MRMSrcReg>, OpSize; // R16 = R16*I8
+def IMUL32rri8 : Ii8 <"imul", 0x6B, MRMSrcReg>; // R32 = R32*I8
+def IMUL16rmi : Im16i16 <"imul", 0x69, MRMSrcMem>, OpSize; // R16 = [mem16]*I16
+def IMUL32rmi : Im32i32 <"imul", 0x69, MRMSrcMem>; // R32 = [mem32]*I32
+def IMUL16rmi8 : Im16i8<"imul", 0x6B, MRMSrcMem>, OpSize; // R16 = [mem16]*I8
+def IMUL32rmi8 : Im32i8<"imul", 0x6B, MRMSrcMem>; // R32 = [mem32]*I8
+//===----------------------------------------------------------------------===//
// Test instructions are just like AND, except they don't generate a result.
-def TESTrr8 : X86Inst<"test", 0x84, MRMDestReg, Arg8 >; // flags = R8 & R8
-def TESTrr16 : X86Inst<"test", 0x85, MRMDestReg, Arg16>, OpSize; // flags = R16 & R16
-def TESTrr32 : X86Inst<"test", 0x85, MRMDestReg, Arg32>; // flags = R32 & R32
-def TESTri8 : X86Inst<"test", 0xF6, MRMS0r , Arg8 >; // flags = R8 & imm8
-def TESTri16 : X86Inst<"test", 0xF7, MRMS0r , Arg16>, OpSize; // flags = R16 & imm16
-def TESTri32 : X86Inst<"test", 0xF7, MRMS0r , Arg32>; // flags = R32 & imm32
+def TEST8rr : I <"test", 0x84, MRMDestReg>; // flags = R8 & R8
+def TEST16rr : I <"test", 0x85, MRMDestReg>, OpSize; // flags = R16 & R16
+def TEST32rr : I <"test", 0x85, MRMDestReg>; // flags = R32 & R32
+def TEST8mr : Im8 <"test", 0x84, MRMDestMem>; // flags = [mem8] & R8
+def TEST16mr : Im16 <"test", 0x85, MRMDestMem>, OpSize; // flags = [mem16] & R16
+def TEST32mr : Im32 <"test", 0x85, MRMDestMem>; // flags = [mem32] & R32
+def TEST8rm : Im8 <"test", 0x84, MRMSrcMem >; // flags = R8 & [mem8]
+def TEST16rm : Im16 <"test", 0x85, MRMSrcMem >, OpSize; // flags = R16 & [mem16]
+def TEST32rm : Im32 <"test", 0x85, MRMSrcMem >; // flags = R32 & [mem32]
+
+def TEST8ri : Ii8 <"test", 0xF6, MRM0r >; // flags = R8 & imm8
+def TEST16ri : Ii16 <"test", 0xF7, MRM0r >, OpSize; // flags = R16 & imm16
+def TEST32ri : Ii32 <"test", 0xF7, MRM0r >; // flags = R32 & imm32
+def TEST8mi : Im8i8 <"test", 0xF6, MRM0m >; // flags = [mem8] & imm8
+def TEST16mi : Im16i16<"test", 0xF7, MRM0m >, OpSize; // flags = [mem16] & imm16
+def TEST32mi : Im32i32<"test", 0xF7, MRM0m >; // flags = [mem32] & imm32
-// Shift instructions
-class UsesCL { list<Register> Uses = [CL]; bit printImplicitUses = 1; }
-def SHLrr8 : I2A8 <"shl", 0xD2, MRMS4r > , UsesCL; // R8 <<= cl
-def SHLrr16 : I2A8 <"shl", 0xD3, MRMS4r >, OpSize, UsesCL; // R16 <<= cl
-def SHLrr32 : I2A8 <"shl", 0xD3, MRMS4r > , UsesCL; // R32 <<= cl
-def SHLir8 : I2A8 <"shl", 0xC0, MRMS4r >; // R8 <<= imm8
-def SHLir16 : I2A8 <"shl", 0xC1, MRMS4r >, OpSize; // R16 <<= imm16
-def SHLir32 : I2A8 <"shl", 0xC1, MRMS4r >; // R32 <<= imm32
-def SHRrr8 : I2A8 <"shr", 0xD2, MRMS5r > , UsesCL; // R8 >>= cl
-def SHRrr16 : I2A8 <"shr", 0xD3, MRMS5r >, OpSize, UsesCL; // R16 >>= cl
-def SHRrr32 : I2A8 <"shr", 0xD3, MRMS5r > , UsesCL; // R32 >>= cl
-def SHRir8 : I2A8 <"shr", 0xC0, MRMS5r >; // R8 >>= imm8
-def SHRir16 : I2A8 <"shr", 0xC1, MRMS5r >, OpSize; // R16 >>= imm16
-def SHRir32 : I2A8 <"shr", 0xC1, MRMS5r >; // R32 >>= imm32
-def SARrr8 : I2A8 <"sar", 0xD2, MRMS7r > , UsesCL; // R8 >>>= cl
-def SARrr16 : I2A8 <"sar", 0xD3, MRMS7r >, OpSize, UsesCL; // R16 >>>= cl
-def SARrr32 : I2A8 <"sar", 0xD3, MRMS7r > , UsesCL; // R32 >>>= cl
-def SARir8 : I2A8 <"sar", 0xC0, MRMS7r >; // R8 >>>= imm8
-def SARir16 : I2A8 <"sar", 0xC1, MRMS7r >, OpSize; // R16 >>>= imm16
-def SARir32 : I2A8 <"sar", 0xC1, MRMS7r >; // R32 >>>= imm32
-
-def SHLDrr32 : I2A8 <"shld", 0xA5, MRMDestReg>, TB, UsesCL; // R32 <<= R32,R32 cl
-def SHLDir32 : I2A8 <"shld", 0xA4, MRMDestReg>, TB; // R32 <<= R32,R32 imm8
-def SHRDrr32 : I2A8 <"shrd", 0xAD, MRMDestReg>, TB, UsesCL; // R32 >>= R32,R32 cl
-def SHRDir32 : I2A8 <"shrd", 0xAC, MRMDestReg>, TB; // R32 >>= R32,R32 imm8
// Condition code ops, incl. set if equal/not equal/...
-def SAHF : X86Inst<"sahf" , 0x9E, RawFrm, Arg8>, Imp<[AH],[]>; // flags = AH
-def SETBr : X86Inst<"setb" , 0x92, MRMS0r, Arg8>, TB; // R8 = < unsign
-def SETAEr : X86Inst<"setae", 0x93, MRMS0r, Arg8>, TB; // R8 = >= unsign
-def SETEr : X86Inst<"sete" , 0x94, MRMS0r, Arg8>, TB; // R8 = ==
-def SETNEr : X86Inst<"setne", 0x95, MRMS0r, Arg8>, TB; // R8 = !=
-def SETBEr : X86Inst<"setbe", 0x96, MRMS0r, Arg8>, TB; // R8 = <= unsign
-def SETAr : X86Inst<"seta" , 0x97, MRMS0r, Arg8>, TB; // R8 = > signed
-def SETSr : X86Inst<"sets" , 0x98, MRMS0r, Arg8>, TB; // R8 = <sign bit>
-def SETNSr : X86Inst<"setns", 0x99, MRMS0r, Arg8>, TB; // R8 = !<sign bit>
-def SETLr : X86Inst<"setl" , 0x9C, MRMS0r, Arg8>, TB; // R8 = < signed
-def SETGEr : X86Inst<"setge", 0x9D, MRMS0r, Arg8>, TB; // R8 = >= signed
-def SETLEr : X86Inst<"setle", 0x9E, MRMS0r, Arg8>, TB; // R8 = <= signed
-def SETGr : X86Inst<"setg" , 0x9F, MRMS0r, Arg8>, TB; // R8 = < signed
+def SAHF : I <"sahf" , 0x9E, RawFrm>, Imp<[AH],[]>; // flags = AH
-// Conditional moves. These are modelled as X = cmovXX Y, Z. Eventually
-// register allocated to cmovXX XY, Z
-def CMOVErr16 : I2A16<"cmove", 0x44, MRMSrcReg>, TB, OpSize; // if ==, R16 = R16
-def CMOVNErr32: I2A32<"cmovne",0x45, MRMSrcReg>, TB; // if !=, R32 = R32
+def SETBr : I <"setb" , 0x92, MRM0r>, TB; // R8 = < unsign
+def SETBm : Im8<"setb" , 0x92, MRM0m>, TB; // [mem8] = < unsign
+def SETAEr : I <"setae", 0x93, MRM0r>, TB; // R8 = >= unsign
+def SETAEm : Im8<"setae", 0x93, MRM0m>, TB; // [mem8] = >= unsign
+def SETEr : I <"sete" , 0x94, MRM0r>, TB; // R8 = ==
+def SETEm : Im8<"sete" , 0x94, MRM0m>, TB; // [mem8] = ==
+def SETNEr : I <"setne", 0x95, MRM0r>, TB; // R8 = !=
+def SETNEm : Im8<"setne", 0x95, MRM0m>, TB; // [mem8] = !=
+def SETBEr : I <"setbe", 0x96, MRM0r>, TB; // R8 = <= unsign
+def SETBEm : Im8<"setbe", 0x96, MRM0m>, TB; // [mem8] = <= unsign
+def SETAr : I <"seta" , 0x97, MRM0r>, TB; // R8 = > signed
+def SETAm : Im8<"seta" , 0x97, MRM0m>, TB; // [mem8] = > signed
+def SETSr : I <"sets" , 0x98, MRM0r>, TB; // R8 = <sign bit>
+def SETSm : Im8<"sets" , 0x98, MRM0m>, TB; // [mem8] = <sign bit>
+def SETNSr : I <"setns", 0x99, MRM0r>, TB; // R8 = !<sign bit>
+def SETNSm : Im8<"setns", 0x99, MRM0m>, TB; // [mem8] = !<sign bit>
+def SETLr : I <"setl" , 0x9C, MRM0r>, TB; // R8 = < signed
+def SETLm : Im8<"setl" , 0x9C, MRM0m>, TB; // [mem8] = < signed
+def SETGEr : I <"setge", 0x9D, MRM0r>, TB; // R8 = >= signed
+def SETGEm : Im8<"setge", 0x9D, MRM0m>, TB; // [mem8] = >= signed
+def SETLEr : I <"setle", 0x9E, MRM0r>, TB; // R8 = <= signed
+def SETLEm : Im8<"setle", 0x9E, MRM0m>, TB; // [mem8] = <= signed
+def SETGr : I <"setg" , 0x9F, MRM0r>, TB; // R8 = < signed
+def SETGm : Im8<"setg" , 0x9F, MRM0m>, TB; // [mem8] = < signed
// Integer comparisons
-def CMPrr8 : X86Inst<"cmp", 0x38, MRMDestReg, Arg8 >; // compare R8, R8
-def CMPrr16 : X86Inst<"cmp", 0x39, MRMDestReg, Arg16>, OpSize; // compare R16, R16
-def CMPrr32 : X86Inst<"cmp", 0x39, MRMDestReg, Arg32>, // compare R32, R32
+def CMP8rr : I <"cmp", 0x38, MRMDestReg>; // compare R8, R8
+def CMP16rr : I <"cmp", 0x39, MRMDestReg>, OpSize; // compare R16, R16
+def CMP32rr : I <"cmp", 0x39, MRMDestReg>, // compare R32, R32
Pattern<(isVoid (unspec2 R32, R32))>;
-def CMPri8 : X86Inst<"cmp", 0x80, MRMS7r , Arg8 >; // compare R8, imm8
-def CMPri16 : X86Inst<"cmp", 0x81, MRMS7r , Arg16>, OpSize; // compare R16, imm16
-def CMPri32 : X86Inst<"cmp", 0x81, MRMS7r , Arg32>; // compare R32, imm32
+def CMP8mr : Im8 <"cmp", 0x38, MRMDestMem>; // compare [mem8], R8
+def CMP16mr : Im16 <"cmp", 0x39, MRMDestMem>, OpSize; // compare [mem16], R16
+def CMP32mr : Im32 <"cmp", 0x39, MRMDestMem>; // compare [mem32], R32
+def CMP8rm : Im8 <"cmp", 0x3A, MRMSrcMem >; // compare R8, [mem8]
+def CMP16rm : Im16 <"cmp", 0x3B, MRMSrcMem >, OpSize; // compare R16, [mem16]
+def CMP32rm : Im32 <"cmp", 0x3B, MRMSrcMem >; // compare R32, [mem32]
+def CMP8ri : Ii8 <"cmp", 0x80, MRM7r >; // compare R8, imm8
+def CMP16ri : Ii16 <"cmp", 0x81, MRM7r >, OpSize; // compare R16, imm16
+def CMP32ri : Ii32 <"cmp", 0x81, MRM7r >; // compare R32, imm32
+def CMP8mi : Im8i8 <"cmp", 0x80, MRM7m >; // compare [mem8], imm8
+def CMP16mi : Im16i16<"cmp", 0x81, MRM7m >, OpSize; // compare [mem16], imm16
+def CMP32mi : Im32i32<"cmp", 0x81, MRM7m >; // compare [mem32], imm32
// Sign/Zero extenders
-def MOVSXr16r8 : X86Inst<"movsx", 0xBE, MRMSrcReg, Arg8>, TB, OpSize; // R16 = signext(R8)
-def MOVSXr32r8 : X86Inst<"movsx", 0xBE, MRMSrcReg, Arg8>, TB; // R32 = signext(R8)
-def MOVSXr32r16: X86Inst<"movsx", 0xBF, MRMSrcReg, Arg8>, TB; // R32 = signext(R16)
-def MOVZXr16r8 : X86Inst<"movzx", 0xB6, MRMSrcReg, Arg8>, TB, OpSize; // R16 = zeroext(R8)
-def MOVZXr32r8 : X86Inst<"movzx", 0xB6, MRMSrcReg, Arg8>, TB; // R32 = zeroext(R8)
-def MOVZXr32r16: X86Inst<"movzx", 0xB7, MRMSrcReg, Arg8>, TB; // R32 = zeroext(R16)
+def MOVSX16rr8 : I <"movsx", 0xBE, MRMSrcReg>, TB, OpSize; // R16 = signext(R8)
+def MOVSX32rr8 : I <"movsx", 0xBE, MRMSrcReg>, TB; // R32 = signext(R8)
+def MOVSX32rr16: I <"movsx", 0xBF, MRMSrcReg>, TB; // R32 = signext(R16)
+def MOVSX16rm8 : Im8 <"movsx", 0xBE, MRMSrcMem>, TB, OpSize; // R16 = signext([mem8])
+def MOVSX32rm8 : Im8 <"movsx", 0xBE, MRMSrcMem>, TB; // R32 = signext([mem8])
+def MOVSX32rm16: Im16<"movsx", 0xBF, MRMSrcMem>, TB; // R32 = signext([mem16])
+
+def MOVZX16rr8 : I <"movzx", 0xB6, MRMSrcReg>, TB, OpSize; // R16 = zeroext(R8)
+def MOVZX32rr8 : I <"movzx", 0xB6, MRMSrcReg>, TB; // R32 = zeroext(R8)
+def MOVZX32rr16: I <"movzx", 0xB7, MRMSrcReg>, TB; // R32 = zeroext(R16)
+def MOVZX16rm8 : Im8 <"movzx", 0xB6, MRMSrcMem>, TB, OpSize; // R16 = zeroext([mem8])
+def MOVZX32rm8 : Im8 <"movzx", 0xB6, MRMSrcMem>, TB; // R32 = zeroext([mem8])
+def MOVZX32rm16: Im16<"movzx", 0xB7, MRMSrcMem>, TB; // R32 = zeroext([mem16])
//===----------------------------------------------------------------------===//
@@ -384,64 +610,78 @@
// FIXME: These need to indicate mod/ref sets for FP regs... & FP 'TOP'
-// Floating point pseudo instructions...
-class FPInst<string n, bits<8> o, Format F, ArgType t, FPFormat fp>
- : X86Inst<n, o, F, t> { let FPForm = fp; let FPFormBits = FPForm.Value; }
-
-def FpMOV : FPInst<"FMOV", 0, Pseudo, ArgF80, SpecialFP>; // f1 = fmov f2
-def FpADD : FPInst<"FADD", 0, Pseudo, ArgF80, TwoArgFP>; // f1 = fadd f2, f3
-def FpSUB : FPInst<"FSUB", 0, Pseudo, ArgF80, TwoArgFP>; // f1 = fsub f2, f3
-def FpMUL : FPInst<"FMUL", 0, Pseudo, ArgF80, TwoArgFP>; // f1 = fmul f2, f3
-def FpDIV : FPInst<"FDIV", 0, Pseudo, ArgF80, TwoArgFP>; // f1 = fdiv f2, f3
+// Floating point instruction templates
+class FPInst<string n, bits<8> o, Format F, FPFormat fp, MemType m, ImmType i>
+ : X86Inst<n, o, F, m, i> { let FPForm = fp; let FPFormBits = FPForm.Value; }
+
+class FPI<string n, bits<8> o, Format F, FPFormat fp> : FPInst<n, o, F, fp, NoMem, NoImm>;
+
+class FPIM<string n, bits<8> o, Format F, FPFormat fp, MemType m> : FPInst<n, o, F, fp, m, NoImm>;
+
+class FPI16m<string n, bits<8> o, Format F, FPFormat fp> : FPIM<n, o, F, fp, Mem16>;
+class FPI32m<string n, bits<8> o, Format F, FPFormat fp> : FPIM<n, o, F, fp, Mem32>;
+class FPI64m<string n, bits<8> o, Format F, FPFormat fp> : FPIM<n, o, F, fp, Mem64>;
+class FPI80m<string n, bits<8> o, Format F, FPFormat fp> : FPIM<n, o, F, fp, Mem80>;
+
+// Pseudo instructions for floating point. We use these pseudo instructions
+// because they can be expanded by the fp spackifier into one of many different
+// forms of instructions for doing these operations. Until the stackifier runs,
+// we prefer to be abstract.
+def FpMOV : FPI<"FMOV", 0, Pseudo, SpecialFP>; // f1 = fmov f2
+def FpADD : FPI<"FADD", 0, Pseudo, TwoArgFP>; // f1 = fadd f2, f3
+def FpSUB : FPI<"FSUB", 0, Pseudo, TwoArgFP>; // f1 = fsub f2, f3
+def FpMUL : FPI<"FMUL", 0, Pseudo, TwoArgFP>; // f1 = fmul f2, f3
+def FpDIV : FPI<"FDIV", 0, Pseudo, TwoArgFP>; // f1 = fdiv f2, f3
+
+def FpUCOM : FPI<"FUCOM", 0, Pseudo, TwoArgFP>; // FPSW = fucom f1, f2
+def FpGETRESULT : FPI<"FGETRESULT",0, Pseudo, SpecialFP>; // FPR = ST(0)
+def FpSETRESULT : FPI<"FSETRESULT",0, Pseudo, SpecialFP>; // ST(0) = FPR
-def FpUCOM : FPInst<"FUCOM", 0, Pseudo, ArgF80, TwoArgFP>; // FPSW = fucom f1, f2
+// Floating point loads & stores...
+def FLDrr : FPI <"fld" , 0xC0, AddRegFrm, NotFP>, D9; // push(ST(i))
+def FLD32m : FPI32m <"fld" , 0xD9, MRM0m , ZeroArgFP>; // load float
+def FLD64m : FPI64m <"fld" , 0xDD, MRM0m , ZeroArgFP>; // load double
+def FLD80m : FPI80m <"fld" , 0xDB, MRM5m , ZeroArgFP>; // load extended
+def FILD16m : FPI16m <"fild" , 0xDF, MRM0m , ZeroArgFP>; // load signed short
+def FILD32m : FPI32m <"fild" , 0xDB, MRM0m , ZeroArgFP>; // load signed int
+def FILD64m : FPI64m <"fild" , 0xDF, MRM5m , ZeroArgFP>; // load signed long
+
+def FSTrr : FPI <"fst" , 0xD0, AddRegFrm, NotFP >, DD; // ST(i) = ST(0)
+def FSTPrr : FPI <"fstp", 0xD8, AddRegFrm, NotFP >, DD; // ST(i) = ST(0), pop
+def FST32m : FPI32m <"fst" , 0xD9, MRM2m , OneArgFP>; // store float
+def FST64m : FPI64m <"fst" , 0xDD, MRM2m , OneArgFP>; // store double
+def FSTP32m : FPI32m <"fstp", 0xD9, MRM3m , OneArgFP>; // store float, pop
+def FSTP64m : FPI64m <"fstp", 0xDD, MRM3m , OneArgFP>; // store double, pop
+def FSTP80m : FPI80m <"fstp", 0xDB, MRM7m , OneArgFP>; // store extended, pop
+
+def FIST16m : FPI16m <"fist", 0xDF, MRM2m , OneArgFP>; // store signed short
+def FIST32m : FPI32m <"fist", 0xDB, MRM2m , OneArgFP>; // store signed int
+def FISTP16m : FPI16m <"fistp", 0xDF, MRM3m , NotFP >; // store signed short, pop
+def FISTP32m : FPI32m <"fistp", 0xDB, MRM3m , NotFP >; // store signed int, pop
+def FISTP64m : FPI64m <"fistpll", 0xDF, MRM7m , OneArgFP>; // store signed long, pop
-def FpGETRESULT : FPInst<"FGETRESULT",0, Pseudo, ArgF80, SpecialFP>; // FPR = ST(0)
+def FXCH : FPI <"fxch", 0xC8, AddRegFrm, NotFP>, D9; // fxch ST(i), ST(0)
-def FpSETRESULT : FPInst<"FSETRESULT",0, Pseudo, ArgF80, SpecialFP>; // ST(0) = FPR
+// Floating point constant loads...
+def FLD0 : FPI<"fldz", 0xEE, RawFrm, ZeroArgFP>, D9;
+def FLD1 : FPI<"fld1", 0xE8, RawFrm, ZeroArgFP>, D9;
-// Floating point loads & stores...
-def FLDrr : FPInst<"fld" , 0xC0, AddRegFrm, ArgF80, NotFP>, D9; // push(ST(i))
-def FLDr32 : FPInst<"fld" , 0xD9, MRMS0m , ArgF32, ZeroArgFP>; // load float
-def FLDr64 : FPInst<"fld" , 0xDD, MRMS0m , ArgF64, ZeroArgFP>; // load double
-def FLDr80 : FPInst<"fld" , 0xDB, MRMS5m , ArgF80, ZeroArgFP>; // load extended
-def FILDr16 : FPInst<"fild" , 0xDF, MRMS0m , Arg16 , ZeroArgFP>; // load signed short
-def FILDr32 : FPInst<"fild" , 0xDB, MRMS0m , Arg32 , ZeroArgFP>; // load signed int
-def FILDr64 : FPInst<"fild" , 0xDF, MRMS5m , Arg64 , ZeroArgFP>; // load signed long
-
-def FSTr32 : FPInst<"fst" , 0xD9, MRMS2m , ArgF32, OneArgFP>; // store float
-def FSTr64 : FPInst<"fst" , 0xDD, MRMS2m , ArgF64, OneArgFP>; // store double
-def FSTPr32 : FPInst<"fstp", 0xD9, MRMS3m , ArgF32, OneArgFP>; // store float, pop
-def FSTPr64 : FPInst<"fstp", 0xDD, MRMS3m , ArgF64, OneArgFP>; // store double, pop
-def FSTPr80 : FPInst<"fstp", 0xDB, MRMS7m , ArgF80, OneArgFP>; // store extended, pop
-def FSTrr : FPInst<"fst" , 0xD0, AddRegFrm, ArgF80, NotFP >, DD; // ST(i) = ST(0)
-def FSTPrr : FPInst<"fstp", 0xD8, AddRegFrm, ArgF80, NotFP >, DD; // ST(i) = ST(0), pop
-
-def FISTr16 : FPInst<"fist", 0xDF, MRMS2m, Arg16 , OneArgFP>; // store signed short
-def FISTr32 : FPInst<"fist", 0xDB, MRMS2m, Arg32 , OneArgFP>; // store signed int
-def FISTPr16 : FPInst<"fistp", 0xDF, MRMS3m, Arg16 , NotFP >; // store signed short, pop
-def FISTPr32 : FPInst<"fistp", 0xDB, MRMS3m, Arg32 , NotFP >; // store signed int, pop
-def FISTPr64 : FPInst<"fistpll", 0xDF, MRMS7m, Arg64 , OneArgFP>; // store signed long, pop
-def FXCH : FPInst<"fxch", 0xC8, AddRegFrm, ArgF80, NotFP>, D9; // fxch ST(i), ST(0)
+// Unary operations...
+def FCHS : FPI<"fchs", 0xE0, RawFrm, OneArgFPRW>, D9; // f1 = fchs f2
-// Floating point constant loads...
-def FLD0 : FPInst<"fldz", 0xEE, RawFrm, ArgF80, ZeroArgFP>, D9;
-def FLD1 : FPInst<"fld1", 0xE8, RawFrm, ArgF80, ZeroArgFP>, D9;
+def FTST : FPI<"ftst", 0xE4, RawFrm, OneArgFP>, D9; // ftst ST(0)
// Binary arithmetic operations...
-class FPST0rInst<string n, bits<8> o>
- : X86Inst<n, o, AddRegFrm, ArgF80>, D8 {
+class FPST0rInst<string n, bits<8> o> : I<n, o, AddRegFrm>, D8 {
list<Register> Uses = [ST0];
list<Register> Defs = [ST0];
}
-class FPrST0Inst<string n, bits<8> o>
- : X86Inst<n, o, AddRegFrm, ArgF80>, DC {
+class FPrST0Inst<string n, bits<8> o> : I<n, o, AddRegFrm>, DC {
bit printImplicitUses = 1;
list<Register> Uses = [ST0];
}
-class FPrST0PInst<string n, bits<8> o>
- : X86Inst<n, o, AddRegFrm, ArgF80>, DE {
+class FPrST0PInst<string n, bits<8> o> : I<n, o, AddRegFrm>, DE {
list<Register> Uses = [ST0];
}
@@ -470,14 +710,14 @@
def FDIVRPrST0 : FPrST0PInst<"fdivrp", 0xF0>; // ST(i) = ST(0) / ST(i), pop
// Floating point compares
-def FUCOMr : X86Inst<"fucom" , 0xE0, AddRegFrm, ArgF80>, DD, Imp<[ST0],[]>; // FPSW = compare ST(0) with ST(i)
-def FUCOMPr : X86Inst<"fucomp" , 0xE8, AddRegFrm, ArgF80>, DD, Imp<[ST0],[]>; // FPSW = compare ST(0) with ST(i), pop
-def FUCOMPPr : X86Inst<"fucompp", 0xE9, RawFrm , ArgF80>, DA, Imp<[ST0],[]>; // compare ST(0) with ST(1), pop, pop
+def FUCOMr : I<"fucom" , 0xE0, AddRegFrm>, DD, Imp<[ST0],[]>; // FPSW = compare ST(0) with ST(i)
+def FUCOMPr : I<"fucomp" , 0xE8, AddRegFrm>, DD, Imp<[ST0],[]>; // FPSW = compare ST(0) with ST(i), pop
+def FUCOMPPr : I<"fucompp", 0xE9, RawFrm >, DA, Imp<[ST0],[]>; // compare ST(0) with ST(1), pop, pop
// Floating point flag ops
-def FNSTSWr8 : X86Inst<"fnstsw" , 0xE0, RawFrm , ArgF80>, DF, Imp<[],[AX]>; // AX = fp flags
-def FNSTCWm16 : X86Inst<"fnstcw" , 0xD9, MRMS7m , Arg16 >; // [mem16] = X87 control world
-def FLDCWm16 : X86Inst<"fldcw" , 0xD9, MRMS5m , Arg16 >; // X87 control world = [mem16]
+def FNSTSW8r : I <"fnstsw" , 0xE0, RawFrm>, DF, Imp<[],[AX]>; // AX = fp flags
+def FNSTCW16m : Im16<"fnstcw" , 0xD9, MRM7m >; // [mem16] = X87 control world
+def FLDCW16m : Im16<"fldcw" , 0xD9, MRM5m >; // X87 control world = [mem16]
//===----------------------------------------------------------------------===//
@@ -485,26 +725,26 @@
//
def RET_R32 : Expander<(ret R32:$reg),
- [(MOVrr32 EAX, R32:$reg),
+ [(MOV32rr EAX, R32:$reg),
(RET)]>;
// FIXME: This should eventually just be implemented by defining a frameidx as a
// value address for a load.
def LOAD_FI16 : Expander<(set R16:$dest, (load frameidx:$fi)),
- [(MOVmr16 R16:$dest, frameidx:$fi, 1, 0/*NoReg*/, 0)]>;
+ [(MOV16rm R16:$dest, frameidx:$fi, 1, 0/*NoReg*/, 0)]>;
def LOAD_FI32 : Expander<(set R32:$dest, (load frameidx:$fi)),
- [(MOVmr32 R32:$dest, frameidx:$fi, 1, 0/*NoReg*/, 0)]>;
+ [(MOV32rm R32:$dest, frameidx:$fi, 1, 0/*NoReg*/, 0)]>;
def LOAD_R16 : Expander<(set R16:$dest, (load R32:$src)),
- [(MOVmr16 R16:$dest, R32:$src, 1, 0/*NoReg*/, 0)]>;
+ [(MOV16rm R16:$dest, R32:$src, 1, 0/*NoReg*/, 0)]>;
def LOAD_R32 : Expander<(set R32:$dest, (load R32:$src)),
- [(MOVmr32 R32:$dest, R32:$src, 1, 0/*NoReg*/, 0)]>;
+ [(MOV32rm R32:$dest, R32:$src, 1, 0/*NoReg*/, 0)]>;
def BR_EQ : Expander<(brcond (seteq R32:$a1, R32:$a2),
basicblock:$d1, basicblock:$d2),
- [(CMPrr32 R32:$a1, R32:$a2),
+ [(CMP32rr R32:$a1, R32:$a2),
(JE basicblock:$d1),
(JMP basicblock:$d2)]>;
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
diff -u llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40 llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40.4.1
--- llvm/lib/Target/X86/X86RegisterInfo.cpp:1.40 Tue Nov 11 16:41:33 2003
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp Mon Mar 1 17:58:15 2004
@@ -24,13 +24,21 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "Support/CommandLine.h"
-
-namespace llvm {
+#include "Support/STLExtras.h"
+using namespace llvm;
namespace {
cl::opt<bool>
NoFPElim("disable-fp-elim",
cl::desc("Disable frame pointer elimination optimization"));
+ cl::opt<bool>
+ NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+ cl::opt<bool>
+ PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
}
X86RegisterInfo::X86RegisterInfo()
@@ -47,40 +55,257 @@
}
int X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MI,
unsigned SrcReg, int FrameIdx,
const TargetRegisterClass *RC) const {
static const unsigned Opcode[] =
- { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32, X86::FSTPr80 };
- MachineInstr *MI = addFrameReference(BuildMI(Opcode[getIdx(RC)], 5),
+ { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FSTP80m };
+ MachineInstr *I = addFrameReference(BuildMI(Opcode[getIdx(RC)], 5),
FrameIdx).addReg(SrcReg);
- MBBI = MBB.insert(MBBI, MI)+1;
+ MBB.insert(MI, I);
return 1;
}
int X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC) const{
static const unsigned Opcode[] =
- { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32, X86::FLDr80 };
- MachineInstr *MI = addFrameReference(BuildMI(Opcode[getIdx(RC)], 4, DestReg),
- FrameIdx);
- MBBI = MBB.insert(MBBI, MI)+1;
+ { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD80m };
+ unsigned OC = Opcode[getIdx(RC)];
+ MBB.insert(MI, addFrameReference(BuildMI(OC, 4, DestReg), FrameIdx));
return 1;
}
int X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const {
static const unsigned Opcode[] =
- { X86::MOVrr8, X86::MOVrr16, X86::MOVrr32, X86::FpMOV };
- MachineInstr *MI = BuildMI(Opcode[getIdx(RC)],1,DestReg).addReg(SrcReg);
- MBBI = MBB.insert(MBBI, MI)+1;
+ { X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV };
+ MBB.insert(MI, BuildMI(Opcode[getIdx(RC)],1,DestReg).addReg(SrcReg));
return 1;
}
+static MachineInstr *MakeMInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ return addFrameReference(BuildMI(Opcode, 4), FrameIndex);
+}
+
+static MachineInstr *MakeMRInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ return addFrameReference(BuildMI(Opcode, 5), FrameIndex)
+ .addReg(MI->getOperand(1).getReg());
+}
+
+static MachineInstr *MakeMRIInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ return addFrameReference(BuildMI(Opcode, 5), FrameIndex)
+ .addReg(MI->getOperand(1).getReg())
+ .addZImm(MI->getOperand(2).getImmedValue());
+}
+
+static MachineInstr *MakeMIInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ if (MI->getOperand(1).isImmediate())
+ return addFrameReference(BuildMI(Opcode, 5), FrameIndex)
+ .addZImm(MI->getOperand(1).getImmedValue());
+ else if (MI->getOperand(1).isGlobalAddress())
+ return addFrameReference(BuildMI(Opcode, 5), FrameIndex)
+ .addGlobalAddress(MI->getOperand(1).getGlobal());
+ assert(0 && "Unknown operand for MakeMI!");
+ return 0;
+}
+
+static MachineInstr *MakeRMInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ const MachineOperand& op = MI->getOperand(0);
+ return addFrameReference(BuildMI(Opcode, 5, op.getReg(), op.getUseType()),
+ FrameIndex);
+}
+
+static MachineInstr *MakeRMIInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ const MachineOperand& op = MI->getOperand(0);
+ return addFrameReference(BuildMI(Opcode, 5, op.getReg(), op.getUseType()),
+ FrameIndex).addZImm(MI->getOperand(2).getImmedValue());
+}
+
+
+bool X86RegisterInfo::foldMemoryOperand(MachineBasicBlock::iterator &MI,
+ unsigned i, int FrameIndex) const {
+ if (NoFusing) return false;
+
+ /// FIXME: This should obviously be autogenerated by tablegen when patterns
+ /// are available!
+ MachineBasicBlock& MBB = *MI->getParent();
+ MachineInstr* NI = 0;
+ if (i == 0) {
+ switch(MI->getOpcode()) {
+ case X86::XCHG8rr: NI = MakeMRInst(X86::XCHG8mr ,FrameIndex, MI); break;
+ case X86::XCHG16rr:NI = MakeMRInst(X86::XCHG16mr,FrameIndex, MI); break;
+ case X86::XCHG32rr:NI = MakeMRInst(X86::XCHG32mr,FrameIndex, MI); break;
+ case X86::MOV8rr: NI = MakeMRInst(X86::MOV8mr , FrameIndex, MI); break;
+ case X86::MOV16rr: NI = MakeMRInst(X86::MOV16mr, FrameIndex, MI); break;
+ case X86::MOV32rr: NI = MakeMRInst(X86::MOV32mr, FrameIndex, MI); break;
+ case X86::MOV8ri: NI = MakeMIInst(X86::MOV8mi , FrameIndex, MI); break;
+ case X86::MOV16ri: NI = MakeMIInst(X86::MOV16mi, FrameIndex, MI); break;
+ case X86::MOV32ri: NI = MakeMIInst(X86::MOV32mi, FrameIndex, MI); break;
+ case X86::MUL8r: NI = MakeMInst( X86::MUL8m , FrameIndex, MI); break;
+ case X86::MUL16r: NI = MakeMInst( X86::MUL16m, FrameIndex, MI); break;
+ case X86::MUL32r: NI = MakeMInst( X86::MUL32m, FrameIndex, MI); break;
+ case X86::DIV8r: NI = MakeMInst( X86::DIV8m , FrameIndex, MI); break;
+ case X86::DIV16r: NI = MakeMInst( X86::DIV16m, FrameIndex, MI); break;
+ case X86::DIV32r: NI = MakeMInst( X86::DIV32m, FrameIndex, MI); break;
+ case X86::IDIV8r: NI = MakeMInst( X86::IDIV8m , FrameIndex, MI); break;
+ case X86::IDIV16r: NI = MakeMInst( X86::IDIV16m, FrameIndex, MI); break;
+ case X86::IDIV32r: NI = MakeMInst( X86::IDIV32m, FrameIndex, MI); break;
+ case X86::NEG8r: NI = MakeMInst( X86::NEG8m , FrameIndex, MI); break;
+ case X86::NEG16r: NI = MakeMInst( X86::NEG16m, FrameIndex, MI); break;
+ case X86::NEG32r: NI = MakeMInst( X86::NEG32m, FrameIndex, MI); break;
+ case X86::NOT8r: NI = MakeMInst( X86::NOT8m , FrameIndex, MI); break;
+ case X86::NOT16r: NI = MakeMInst( X86::NOT16m, FrameIndex, MI); break;
+ case X86::NOT32r: NI = MakeMInst( X86::NOT32m, FrameIndex, MI); break;
+ case X86::INC8r: NI = MakeMInst( X86::INC8m , FrameIndex, MI); break;
+ case X86::INC16r: NI = MakeMInst( X86::INC16m, FrameIndex, MI); break;
+ case X86::INC32r: NI = MakeMInst( X86::INC32m, FrameIndex, MI); break;
+ case X86::DEC8r: NI = MakeMInst( X86::DEC8m , FrameIndex, MI); break;
+ case X86::DEC16r: NI = MakeMInst( X86::DEC16m, FrameIndex, MI); break;
+ case X86::DEC32r: NI = MakeMInst( X86::DEC32m, FrameIndex, MI); break;
+ case X86::ADD8rr: NI = MakeMRInst(X86::ADD8mr , FrameIndex, MI); break;
+ case X86::ADD16rr: NI = MakeMRInst(X86::ADD16mr, FrameIndex, MI); break;
+ case X86::ADD32rr: NI = MakeMRInst(X86::ADD32mr, FrameIndex, MI); break;
+ case X86::ADC32rr: NI = MakeMRInst(X86::ADC32mr, FrameIndex, MI); break;
+ case X86::ADD8ri: NI = MakeMIInst(X86::ADD8mi , FrameIndex, MI); break;
+ case X86::ADD16ri: NI = MakeMIInst(X86::ADD16mi, FrameIndex, MI); break;
+ case X86::ADD32ri: NI = MakeMIInst(X86::ADD32mi, FrameIndex, MI); break;
+ case X86::SUB8rr: NI = MakeMRInst(X86::SUB8mr , FrameIndex, MI); break;
+ case X86::SUB16rr: NI = MakeMRInst(X86::SUB16mr, FrameIndex, MI); break;
+ case X86::SUB32rr: NI = MakeMRInst(X86::SUB32mr, FrameIndex, MI); break;
+ case X86::SBB32rr: NI = MakeMRInst(X86::SBB32mr, FrameIndex, MI); break;
+ case X86::SUB8ri: NI = MakeMIInst(X86::SUB8mi , FrameIndex, MI); break;
+ case X86::SUB16ri: NI = MakeMIInst(X86::SUB16mi, FrameIndex, MI); break;
+ case X86::SUB32ri: NI = MakeMIInst(X86::SUB32mi, FrameIndex, MI); break;
+ case X86::AND8rr: NI = MakeMRInst(X86::AND8mr , FrameIndex, MI); break;
+ case X86::AND16rr: NI = MakeMRInst(X86::AND16mr, FrameIndex, MI); break;
+ case X86::AND32rr: NI = MakeMRInst(X86::AND32mr, FrameIndex, MI); break;
+ case X86::AND8ri: NI = MakeMIInst(X86::AND8mi , FrameIndex, MI); break;
+ case X86::AND16ri: NI = MakeMIInst(X86::AND16mi, FrameIndex, MI); break;
+ case X86::AND32ri: NI = MakeMIInst(X86::AND32mi, FrameIndex, MI); break;
+ case X86::OR8rr: NI = MakeMRInst(X86::OR8mr , FrameIndex, MI); break;
+ case X86::OR16rr: NI = MakeMRInst(X86::OR16mr, FrameIndex, MI); break;
+ case X86::OR32rr: NI = MakeMRInst(X86::OR32mr, FrameIndex, MI); break;
+ case X86::OR8ri: NI = MakeMIInst(X86::OR8mi , FrameIndex, MI); break;
+ case X86::OR16ri: NI = MakeMIInst(X86::OR16mi, FrameIndex, MI); break;
+ case X86::OR32ri: NI = MakeMIInst(X86::OR32mi, FrameIndex, MI); break;
+ case X86::XOR8rr: NI = MakeMRInst(X86::XOR8mr , FrameIndex, MI); break;
+ case X86::XOR16rr: NI = MakeMRInst(X86::XOR16mr, FrameIndex, MI); break;
+ case X86::XOR32rr: NI = MakeMRInst(X86::XOR32mr, FrameIndex, MI); break;
+ case X86::XOR8ri: NI = MakeMIInst(X86::XOR8mi , FrameIndex, MI); break;
+ case X86::XOR16ri: NI = MakeMIInst(X86::XOR16mi, FrameIndex, MI); break;
+ case X86::XOR32ri: NI = MakeMIInst(X86::XOR32mi, FrameIndex, MI); break;
+ case X86::SHL8rCL: NI = MakeMInst( X86::SHL8mCL ,FrameIndex, MI); break;
+ case X86::SHL16rCL:NI = MakeMInst( X86::SHL16mCL,FrameIndex, MI); break;
+ case X86::SHL32rCL:NI = MakeMInst( X86::SHL32mCL,FrameIndex, MI); break;
+ case X86::SHL8ri: NI = MakeMIInst(X86::SHL8mi , FrameIndex, MI); break;
+ case X86::SHL16ri: NI = MakeMIInst(X86::SHL16mi, FrameIndex, MI); break;
+ case X86::SHL32ri: NI = MakeMIInst(X86::SHL32mi, FrameIndex, MI); break;
+ case X86::SHR8rCL: NI = MakeMInst( X86::SHR8mCL ,FrameIndex, MI); break;
+ case X86::SHR16rCL:NI = MakeMInst( X86::SHR16mCL,FrameIndex, MI); break;
+ case X86::SHR32rCL:NI = MakeMInst( X86::SHR32mCL,FrameIndex, MI); break;
+ case X86::SHR8ri: NI = MakeMIInst(X86::SHR8mi , FrameIndex, MI); break;
+ case X86::SHR16ri: NI = MakeMIInst(X86::SHR16mi, FrameIndex, MI); break;
+ case X86::SHR32ri: NI = MakeMIInst(X86::SHR32mi, FrameIndex, MI); break;
+ case X86::SAR8rCL: NI = MakeMInst( X86::SAR8mCL ,FrameIndex, MI); break;
+ case X86::SAR16rCL:NI = MakeMInst( X86::SAR16mCL,FrameIndex, MI); break;
+ case X86::SAR32rCL:NI = MakeMInst( X86::SAR32mCL,FrameIndex, MI); break;
+ case X86::SAR8ri: NI = MakeMIInst(X86::SAR8mi , FrameIndex, MI); break;
+ case X86::SAR16ri: NI = MakeMIInst(X86::SAR16mi, FrameIndex, MI); break;
+ case X86::SAR32ri: NI = MakeMIInst(X86::SAR32mi, FrameIndex, MI); break;
+ case X86::SHLD32rrCL:NI = MakeMRInst( X86::SHLD32mrCL,FrameIndex, MI);break;
+ case X86::SHLD32rri8:NI = MakeMRIInst(X86::SHLD32mri8,FrameIndex, MI);break;
+ case X86::SHRD32rrCL:NI = MakeMRInst( X86::SHRD32mrCL,FrameIndex, MI);break;
+ case X86::SHRD32rri8:NI = MakeMRIInst(X86::SHRD32mri8,FrameIndex, MI);break;
+ case X86::SETBr: NI = MakeMInst( X86::SETBm, FrameIndex, MI); break;
+ case X86::SETAEr: NI = MakeMInst( X86::SETAEm, FrameIndex, MI); break;
+ case X86::SETEr: NI = MakeMInst( X86::SETEm, FrameIndex, MI); break;
+ case X86::SETNEr: NI = MakeMInst( X86::SETNEm, FrameIndex, MI); break;
+ case X86::SETBEr: NI = MakeMInst( X86::SETBEm, FrameIndex, MI); break;
+ case X86::SETAr: NI = MakeMInst( X86::SETAm, FrameIndex, MI); break;
+ case X86::SETSr: NI = MakeMInst( X86::SETSm, FrameIndex, MI); break;
+ case X86::SETNSr: NI = MakeMInst( X86::SETNSm, FrameIndex, MI); break;
+ case X86::SETLr: NI = MakeMInst( X86::SETLm, FrameIndex, MI); break;
+ case X86::SETGEr: NI = MakeMInst( X86::SETGEm, FrameIndex, MI); break;
+ case X86::SETLEr: NI = MakeMInst( X86::SETLEm, FrameIndex, MI); break;
+ case X86::SETGr: NI = MakeMInst( X86::SETGm, FrameIndex, MI); break;
+ case X86::TEST8rr: NI = MakeMRInst(X86::TEST8mr ,FrameIndex, MI); break;
+ case X86::TEST16rr:NI = MakeMRInst(X86::TEST16mr,FrameIndex, MI); break;
+ case X86::TEST32rr:NI = MakeMRInst(X86::TEST32mr,FrameIndex, MI); break;
+ case X86::TEST8ri: NI = MakeMIInst(X86::TEST8mi ,FrameIndex, MI); break;
+ case X86::TEST16ri:NI = MakeMIInst(X86::TEST16mi,FrameIndex, MI); break;
+ case X86::TEST32ri:NI = MakeMIInst(X86::TEST32mi,FrameIndex, MI); break;
+ case X86::CMP8rr: NI = MakeMRInst(X86::CMP8mr , FrameIndex, MI); break;
+ case X86::CMP16rr: NI = MakeMRInst(X86::CMP16mr, FrameIndex, MI); break;
+ case X86::CMP32rr: NI = MakeMRInst(X86::CMP32mr, FrameIndex, MI); break;
+ case X86::CMP8ri: NI = MakeMIInst(X86::CMP8mi , FrameIndex, MI); break;
+ case X86::CMP16ri: NI = MakeMIInst(X86::CMP16mi, FrameIndex, MI); break;
+ case X86::CMP32ri: NI = MakeMIInst(X86::CMP32mi, FrameIndex, MI); break;
+ default: break; // Cannot fold
+ }
+ } else if (i == 1) {
+ switch(MI->getOpcode()) {
+ case X86::XCHG8rr: NI = MakeRMInst(X86::XCHG8rm ,FrameIndex, MI); break;
+ case X86::XCHG16rr:NI = MakeRMInst(X86::XCHG16rm,FrameIndex, MI); break;
+ case X86::XCHG32rr:NI = MakeRMInst(X86::XCHG32rm,FrameIndex, MI); break;
+ case X86::MOV8rr: NI = MakeRMInst(X86::MOV8rm , FrameIndex, MI); break;
+ case X86::MOV16rr: NI = MakeRMInst(X86::MOV16rm, FrameIndex, MI); break;
+ case X86::MOV32rr: NI = MakeRMInst(X86::MOV32rm, FrameIndex, MI); break;
+ case X86::ADD8rr: NI = MakeRMInst(X86::ADD8rm , FrameIndex, MI); break;
+ case X86::ADD16rr: NI = MakeRMInst(X86::ADD16rm, FrameIndex, MI); break;
+ case X86::ADD32rr: NI = MakeRMInst(X86::ADD32rm, FrameIndex, MI); break;
+ case X86::ADC32rr: NI = MakeRMInst(X86::ADC32rm, FrameIndex, MI); break;
+ case X86::SUB8rr: NI = MakeRMInst(X86::SUB8rm , FrameIndex, MI); break;
+ case X86::SUB16rr: NI = MakeRMInst(X86::SUB16rm, FrameIndex, MI); break;
+ case X86::SUB32rr: NI = MakeRMInst(X86::SUB32rm, FrameIndex, MI); break;
+ case X86::SBB32rr: NI = MakeRMInst(X86::SBB32rm, FrameIndex, MI); break;
+ case X86::AND8rr: NI = MakeRMInst(X86::AND8rm , FrameIndex, MI); break;
+ case X86::AND16rr: NI = MakeRMInst(X86::AND16rm, FrameIndex, MI); break;
+ case X86::AND32rr: NI = MakeRMInst(X86::AND32rm, FrameIndex, MI); break;
+ case X86::OR8rr: NI = MakeRMInst(X86::OR8rm , FrameIndex, MI); break;
+ case X86::OR16rr: NI = MakeRMInst(X86::OR16rm, FrameIndex, MI); break;
+ case X86::OR32rr: NI = MakeRMInst(X86::OR32rm, FrameIndex, MI); break;
+ case X86::XOR8rr: NI = MakeRMInst(X86::XOR8rm , FrameIndex, MI); break;
+ case X86::XOR16rr: NI = MakeRMInst(X86::XOR16rm, FrameIndex, MI); break;
+ case X86::XOR32rr: NI = MakeRMInst(X86::XOR32rm, FrameIndex, MI); break;
+ case X86::TEST8rr: NI = MakeRMInst(X86::TEST8rm ,FrameIndex, MI); break;
+ case X86::TEST16rr:NI = MakeRMInst(X86::TEST16rm,FrameIndex, MI); break;
+ case X86::TEST32rr:NI = MakeRMInst(X86::TEST32rm,FrameIndex, MI); break;
+ case X86::IMUL16rr:NI = MakeRMInst(X86::IMUL16rm,FrameIndex, MI); break;
+ case X86::IMUL32rr:NI = MakeRMInst(X86::IMUL32rm,FrameIndex, MI); break;
+ case X86::IMUL16rri: NI = MakeRMIInst(X86::IMUL16rmi, FrameIndex, MI);break;
+ case X86::IMUL32rri: NI = MakeRMIInst(X86::IMUL32rmi, FrameIndex, MI);break;
+ case X86::CMP8rr: NI = MakeRMInst(X86::CMP8rm , FrameIndex, MI); break;
+ case X86::CMP16rr: NI = MakeRMInst(X86::CMP16rm, FrameIndex, MI); break;
+ case X86::CMP32rr: NI = MakeRMInst(X86::CMP32rm, FrameIndex, MI); break;
+ case X86::MOVSX16rr8: NI = MakeRMInst(X86::MOVSX16rm8 , FrameIndex, MI); break;
+ case X86::MOVSX32rr8: NI = MakeRMInst(X86::MOVSX32rm8, FrameIndex, MI); break;
+ case X86::MOVSX32rr16:NI = MakeRMInst(X86::MOVSX32rm16, FrameIndex, MI); break;
+ case X86::MOVZX16rr8: NI = MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI); break;
+ case X86::MOVZX32rr8: NI = MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI); break;
+ case X86::MOVZX32rr16:NI = MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI); break;
+ default: break;
+ }
+ }
+ if (NI) {
+ MI = MBB.insert(MBB.erase(MI), NI);
+ return true;
+ } else {
+ if (PrintFailedFusing)
+ std::cerr << "We failed to fuse: " << *MI;
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -93,14 +318,14 @@
return NoFPElim || MF.getFrameInfo()->hasVarSizedObjects();
}
-int X86RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &I) const {
- MachineInstr *New = 0, *Old = *I;;
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
if (hasFP(MF)) {
// If we have a frame pointer, turn the adjcallstackup instruction into a
// 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
// <amt>'
+ MachineInstr *Old = I;
unsigned Amount = Old->getOperand(0).getImmedValue();
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
@@ -109,30 +334,28 @@
unsigned Align = MF.getTarget().getFrameInfo().getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
+ MachineInstr *New;
if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) {
- New=BuildMI(X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(Amount);
+ New=BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef)
+ .addZImm(Amount);
} else {
assert(Old->getOpcode() == X86::ADJCALLSTACKUP);
- New=BuildMI(X86::ADDri32, 2, X86::ESP).addReg(X86::ESP).addZImm(Amount);
+ New=BuildMI(X86::ADD32ri, 1, X86::ESP, MachineOperand::UseAndDef)
+ .addZImm(Amount);
}
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
}
}
- if (New) {
- *I = New; // Replace the pseudo instruction with a new instruction...
- delete Old;
- return 0;
- } else {
- I = MBB.erase(I);// Just delete the pseudo instruction...
- delete Old;
- return -1;
- }
+ MBB.erase(I);
}
-int X86RegisterInfo::eliminateFrameIndex(MachineFunction &MF,
- MachineBasicBlock::iterator &II) const {
+void X86RegisterInfo::eliminateFrameIndex(MachineFunction &MF,
+ MachineBasicBlock::iterator II) const {
unsigned i = 0;
- MachineInstr &MI = **II;
+ MachineInstr &MI = *II;
while (!MI.getOperand(i).isFrameIndex()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -150,85 +373,86 @@
if (!hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 4; // Skip the saved EBP
MI.SetMachineOperandConst(i+3, MachineOperand::MO_SignExtendedImmed, Offset);
- return 0;
}
-int X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void
+X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
if (hasFP(MF)) {
// Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MF.getFrameInfo()->CreateStackObject(4, 4);
- assert(FrameIdx == MF.getFrameInfo()->getObjectIndexEnd()-1 &&
- "Slot for EBP register must be last in order to be found!");
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
}
- return 0;
}
-int X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
+void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineInstr *MI;
- unsigned oldSize = MBB.size();
// Get the number of bytes to allocate from the FrameInfo
unsigned NumBytes = MFI->getStackSize();
if (hasFP(MF)) {
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4;
+ int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4;
if (NumBytes) { // adjust stack pointer: ESP -= numbytes
- MI= BuildMI(X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(NumBytes);
- MBBI = MBB.insert(MBBI, MI)+1;
+ MI= BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef)
+ .addZImm(NumBytes);
+ MBB.insert(MBBI, MI);
}
// Save EBP into the appropriate stack slot...
- MI = addRegOffset(BuildMI(X86::MOVrm32, 5), // mov [ESP-<offset>], EBP
+ MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-<offset>], EBP
X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP);
- MBBI = MBB.insert(MBBI, MI)+1;
+ MBB.insert(MBBI, MI);
// Update EBP with the new base value...
- if (NumBytes == 0) // mov EBP, ESP
- MI = BuildMI(X86::MOVrr32, 2, X86::EBP).addReg(X86::ESP);
+ if (NumBytes == 4) // mov EBP, ESP
+ MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP);
else // lea EBP, [ESP+StackSize]
- MI = addRegOffset(BuildMI(X86::LEAr32, 5, X86::EBP), X86::ESP, NumBytes);
+ MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4);
- MBBI = MBB.insert(MBBI, MI)+1;
+ MBB.insert(MBBI, MI);
} else {
- // When we have no frame pointer, we reserve argument space for call sites
- // in the function immediately on entry to the current function. This
- // eliminates the need for add/sub ESP brackets around call sites.
- //
- NumBytes += MFI->getMaxCallFrameSize();
-
- // Round the size to a multiple of the alignment (don't forget the 4 byte
- // offset though).
- unsigned Align = MF.getTarget().getFrameInfo().getStackAlignment();
- NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4;
+ if (MFI->hasCalls()) {
+ // When we have no frame pointer, we reserve argument space for call sites
+ // in the function immediately on entry to the current function. This
+ // eliminates the need for add/sub ESP brackets around call sites.
+ //
+ NumBytes += MFI->getMaxCallFrameSize();
+
+ // Round the size to a multiple of the alignment (don't forget the 4 byte
+ // offset though).
+ unsigned Align = MF.getTarget().getFrameInfo().getStackAlignment();
+ NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4;
+ }
// Update frame info to pretend that this is part of the stack...
MFI->setStackSize(NumBytes);
if (NumBytes) {
// adjust stack pointer: ESP -= numbytes
- MI= BuildMI(X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(NumBytes);
+ MI= BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef)
+ .addZImm(NumBytes);
MBB.insert(MBBI, MI);
}
}
- return MBB.size() - oldSize;
}
-int X86RegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- unsigned oldSize = MBB.size();
+void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = MBB.end()-1;
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
MachineInstr *MI;
- assert((*MBBI)->getOpcode() == X86::RET &&
+ assert(MBBI->getOpcode() == X86::RET &&
"Can only insert epilog into returning blocks");
if (hasFP(MF)) {
@@ -237,30 +461,26 @@
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4;
// mov ESP, EBP
- MI = BuildMI(X86::MOVrr32, 1,X86::ESP).addReg(X86::EBP);
- MBBI = 1+MBB.insert(MBBI, MI);
+ MI = BuildMI(X86::MOV32rr, 1,X86::ESP).addReg(X86::EBP);
+ MBB.insert(MBBI, MI);
- // mov EBP, [ESP-<offset>]
- MI = addRegOffset(BuildMI(X86::MOVmr32, 5, X86::EBP), X86::ESP, EBPOffset);
- MBBI = 1+MBB.insert(MBBI, MI);
+ // pop EBP
+ MI = BuildMI(X86::POP32r, 0, X86::EBP);
+ MBB.insert(MBBI, MI);
} else {
// Get the number of bytes allocated from the FrameInfo...
unsigned NumBytes = MFI->getStackSize();
if (NumBytes) { // adjust stack pointer back: ESP += numbytes
- MI =BuildMI(X86::ADDri32, 2, X86::ESP).addReg(X86::ESP).addZImm(NumBytes);
- MBBI = 1+MBB.insert(MBBI, MI);
+ MI =BuildMI(X86::ADD32ri, 1, X86::ESP, MachineOperand::UseAndDef)
+ .addZImm(NumBytes);
+ MBB.insert(MBBI, MI);
}
}
- return MBB.size() - oldSize;
}
-} // End llvm namespace
-
#include "X86GenRegisterInfo.inc"
-namespace llvm {
-
const TargetRegisterClass*
X86RegisterInfo::getRegClassForType(const Type* Ty) const {
switch (Ty->getPrimitiveID()) {
@@ -280,5 +500,3 @@
case Type::DoubleTyID: return &RFPInstance;
}
}
-
-} // End llvm namespace
Index: llvm/lib/Target/X86/X86RegisterInfo.h
diff -u llvm/lib/Target/X86/X86RegisterInfo.h:1.18 llvm/lib/Target/X86/X86RegisterInfo.h:1.18.4.1
--- llvm/lib/Target/X86/X86RegisterInfo.h:1.18 Tue Nov 11 16:41:33 2003
+++ llvm/lib/Target/X86/X86RegisterInfo.h Mon Mar 1 17:58:15 2004
@@ -28,30 +28,41 @@
/// Code Generation virtual methods...
int storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MI,
unsigned SrcReg, int FrameIndex,
const TargetRegisterClass *RC) const;
int loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC) const;
- int copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const;
- int eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &I) const;
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand. If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual bool foldMemoryOperand(MachineBasicBlock::iterator &MI,unsigned OpNum,
+ int FrameIndex) const;
- int eliminateFrameIndex(MachineFunction &MF,
- MachineBasicBlock::iterator &II) const;
- int processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
- int emitPrologue(MachineFunction &MF) const;
- int emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateFrameIndex(MachineFunction &MF,
+ MachineBasicBlock::iterator MI) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
};
} // End llvm namespace
Index: llvm/lib/Target/X86/X86RegisterInfo.td
diff -u llvm/lib/Target/X86/X86RegisterInfo.td:1.8 llvm/lib/Target/X86/X86RegisterInfo.td:1.8.6.1
--- llvm/lib/Target/X86/X86RegisterInfo.td:1.8 Tue Oct 21 10:17:13 2003
+++ llvm/lib/Target/X86/X86RegisterInfo.td Mon Mar 1 17:58:15 2004
@@ -76,8 +76,8 @@
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
//
-def R8 : RegisterClass<i8, 1, [AL, CL, DL, BL, AH, CH, DH, BH]>;
-def R16 : RegisterClass<i16, 2, [AX, CX, DX, BX, SI, DI, BP, SP]> {
+def R8 : RegisterClass<i8, 1, [AL, AH, CL, CH, DL, DH, BL, BH]>;
+def R16 : RegisterClass<i16, 2, [AX, CX, DX, SI, DI, BX, BP, SP]> {
let Methods = [{
iterator allocation_order_end(MachineFunction &MF) const {
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
@@ -88,7 +88,7 @@
}];
}
-def R32 : RegisterClass<i32, 4, [EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP]> {
+def R32 : RegisterClass<i32, 4, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
let Methods = [{
iterator allocation_order_end(MachineFunction &MF) const {
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
Index: llvm/lib/Target/X86/X86TargetMachine.cpp
diff -u llvm/lib/Target/X86/X86TargetMachine.cpp:1.44 llvm/lib/Target/X86/X86TargetMachine.cpp:1.44.2.1
--- llvm/lib/Target/X86/X86TargetMachine.cpp:1.44 Sun Dec 28 15:23:38 2003
+++ llvm/lib/Target/X86/X86TargetMachine.cpp Mon Mar 1 17:58:15 2004
@@ -32,6 +32,9 @@
cl::opt<bool> NoSSAPeephole("disable-ssa-peephole", cl::init(true),
cl::desc("Disable the ssa-based peephole optimizer "
"(defaults to disabled)"));
+ cl::opt<bool> DisableOutput("disable-x86-llc-output", cl::Hidden,
+ cl::desc("Disable the X86 asm printer, for use "
+ "when profiling the code generator."));
}
// allocateX86TargetMachine - Allocate and return a subclass of TargetMachine
@@ -56,9 +59,6 @@
// does to emit statically compiled machine code.
bool X86TargetMachine::addPassesToEmitAssembly(PassManager &PM,
std::ostream &Out) {
- // FIXME: Implement the switch instruction in the instruction selector!
- PM.add(createLowerSwitchPass());
-
// FIXME: Implement the invoke/unwind instructions!
PM.add(createLowerInvokePass());
@@ -66,6 +66,9 @@
// unreachable basic blocks.
PM.add(createCFGSimplificationPass());
+ // FIXME: Implement the switch instruction in the instruction selector!
+ PM.add(createLowerSwitchPass());
+
if (NoPatternISel)
PM.add(createX86SimpleInstructionSelector(*this));
else
@@ -77,18 +80,18 @@
// Print the instruction selected machine code...
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
// Perform register allocation to convert to a concrete x86 representation
PM.add(createRegisterAllocator());
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
PM.add(createX86FloatingPointStackifierPass());
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
// Insert prolog/epilog code. Eliminate abstract frame index references...
PM.add(createPrologEpilogCodeInserter());
@@ -98,7 +101,8 @@
if (PrintCode) // Print the register-allocated code
PM.add(createX86CodePrinterPass(std::cerr, *this));
- PM.add(createX86CodePrinterPass(Out, *this));
+ if (!DisableOutput)
+ PM.add(createX86CodePrinterPass(Out, *this));
// Delete machine code for this function
PM.add(createMachineCodeDeleter());
@@ -111,8 +115,6 @@
/// not supported for this target.
///
void X86JITInfo::addPassesToJITCompile(FunctionPassManager &PM) {
- // FIXME: Implement the switch instruction in the instruction selector!
- PM.add(createLowerSwitchPass());
// FIXME: Implement the invoke/unwind instructions!
PM.add(createLowerInvokePass());
@@ -121,6 +123,9 @@
// unreachable basic blocks.
PM.add(createCFGSimplificationPass());
+ // FIXME: Implement the switch instruction in the instruction selector!
+ PM.add(createLowerSwitchPass());
+
if (NoPatternISel)
PM.add(createX86SimpleInstructionSelector(TM));
else
@@ -134,18 +139,18 @@
// Print the instruction selected machine code...
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
// Perform register allocation to convert to a concrete x86 representation
PM.add(createRegisterAllocator());
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
PM.add(createX86FloatingPointStackifierPass());
if (PrintCode)
- PM.add(createMachineFunctionPrinterPass());
+ PM.add(createMachineFunctionPrinterPass(&std::cerr));
// Insert prolog/epilog code. Eliminate abstract frame index references...
PM.add(createPrologEpilogCodeInserter());
Index: llvm/lib/Target/X86/X86TargetMachine.h
diff -u llvm/lib/Target/X86/X86TargetMachine.h:1.21 llvm/lib/Target/X86/X86TargetMachine.h:1.21.2.1
--- llvm/lib/Target/X86/X86TargetMachine.h:1.21 Sun Dec 28 15:23:38 2003
+++ llvm/lib/Target/X86/X86TargetMachine.h Mon Mar 1 17:58:15 2004
@@ -32,18 +32,14 @@
virtual const X86InstrInfo &getInstrInfo() const { return InstrInfo; }
virtual const TargetFrameInfo &getFrameInfo() const { return FrameInfo; }
- virtual const MRegisterInfo *getRegisterInfo() const {
+ virtual TargetJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const MRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual TargetJITInfo *getJITInfo() {
- return &JITInfo;
- }
-
-
- virtual const TargetSchedInfo &getSchedInfo() const { abort(); }
- virtual const TargetRegInfo &getRegInfo() const { abort(); }
- virtual const TargetCacheInfo &getCacheInfo() const { abort(); }
+ // deprecated interfaces
+ virtual const TargetSchedInfo &getSchedInfo() const { abort(); }
+ virtual const TargetRegInfo &getRegInfo() const { abort(); }
/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
/// get machine code emitted. This uses a MachineCodeEmitter object to handle
More information about the llvm-commits
mailing list