[llvm-commits] CVS: llvm/lib/Target/X86/InstSelectSimple.cpp
Chris Lattner
lattner at cs.uiuc.edu
Sun Jan 12 18:36:09 PST 2003
Changes in directory llvm/lib/Target/X86:
InstSelectSimple.cpp updated: 1.80 -> 1.81
---
Log message:
* Adjust to use new interfaces, eliminating CurReg stuff
* Support arbitrary FP constants
* Fix bugs in frame layout for function calls and incoming arguments
* Insert copies for constant arguments to PHI nodes into the BOTTOM of
predecessor blocks, not the top.
* Implement _floating point_ support: setcc, return, load, store, cast
* Fix several bugs in the cast instruction
* Abstract out call emission and load/store for FP
* Implement malloc/free without previous lowering pass.
* Make use of new forms of MachineOperand
* Implement _long_ support!
* Fix many bugs in FP support
* Change branch from je/jne to je/jmp
* Simplify code generated for GEP instructions
---
Diffs of the changes:
Index: llvm/lib/Target/X86/InstSelectSimple.cpp
diff -u llvm/lib/Target/X86/InstSelectSimple.cpp:1.80 llvm/lib/Target/X86/InstSelectSimple.cpp:1.81
--- llvm/lib/Target/X86/InstSelectSimple.cpp:1.80 Sat Dec 28 15:08:27 2002
+++ llvm/lib/Target/X86/InstSelectSimple.cpp Sun Jan 12 18:32:26 2003
@@ -1,6 +1,6 @@
//===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
//
-// This file defines a simple peephole instruction selector for the x86 platform
+// This file defines a simple peephole instruction selector for the x86 target
//
//===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Target/MRegisterInfo.h"
@@ -59,14 +60,12 @@
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
- unsigned CurReg;
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
// MBBMap - Mapping between LLVM BB -> Machine BB
std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
- ISel(TargetMachine &tm)
- : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
+ ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
/// runOnFunction - Top level implementation of instruction selection for
/// the entire function.
@@ -89,7 +88,6 @@
RegMap.clear();
MBBMap.clear();
- CurReg = MRegisterInfo::FirstVirtualRegister;
F = 0;
return false; // We never modify the LLVM itself.
}
@@ -125,6 +123,14 @@
// Control flow operators
void visitReturnInst(ReturnInst &RI);
void visitBranchInst(BranchInst &BI);
+
+ struct ValueRecord {
+ unsigned Reg;
+ const Type *Ty;
+ ValueRecord(unsigned R, const Type *T) : Reg(R), Ty(T) {}
+ };
+ void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
+ const std::vector<ValueRecord> &Args);
void visitCallInst(CallInst &I);
// Arithmetic operators
@@ -132,8 +138,8 @@
void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
- unsigned destReg, const Type *resultType,
- unsigned op0Reg, unsigned op1Reg);
+ unsigned DestReg, const Type *DestTy,
+ unsigned Op0Reg, unsigned Op1Reg);
void visitMul(BinaryOperator &B);
void visitDiv(BinaryOperator &B) { visitDivRem(B); }
@@ -155,15 +161,16 @@
void visitSetGE(SetCondInst &I) { visitSetCCInst(I, 5); }
// Memory Instructions
+ MachineInstr *doFPLoad(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const Type *Ty, unsigned DestReg);
void visitLoadInst(LoadInst &I);
+ void doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitAllocaInst(AllocaInst &I);
-
- // We assume that by this point, malloc instructions have been
- // lowered to calls, and dlsym will magically find malloc for us.
- void visitMallocInst(MallocInst &I) { visitInstruction (I); }
- void visitFreeInst(FreeInst &I) { visitInstruction(I); }
+ void visitMallocInst(MallocInst &I);
+ void visitFreeInst(FreeInst &I);
// Other operators
void visitShiftInst(ShiftInst &I);
@@ -176,11 +183,16 @@
}
/// promote32 - Make a value 32-bits wide, and put it somewhere.
- void promote32 (const unsigned targetReg, Value *v);
+ ///
+ void promote32(unsigned targetReg, const ValueRecord &VR);
+
+ /// EmitByteSwap - Byteswap SrcReg into DestReg.
+ ///
+ void EmitByteSwap(unsigned DestReg, unsigned SrcReg, unsigned Class);
- // emitGEPOperation - Common code shared between visitGetElementPtrInst and
- // constant expression GEP support.
- //
+ /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
+ /// constant expression GEP support.
+ ///
void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator&IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg);
@@ -192,14 +204,28 @@
MachineBasicBlock::iterator &MBBI,
Constant *C, unsigned Reg);
- /// makeAnotherReg - This method returns the next register number
- /// we haven't yet used.
+ /// makeAnotherReg - This method returns the next register number we haven't
+ /// yet used.
+ ///
+ /// Long values are handled somewhat specially. They are always allocated
+ /// as pairs of 32 bit integer values. The register number returned is the
+ /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
+ /// of the long value.
+ ///
unsigned makeAnotherReg(const Type *Ty) {
+ if (Ty == Type::LongTy || Ty == Type::ULongTy) {
+ const TargetRegisterClass *RC =
+ TM.getRegisterInfo()->getRegClassForType(Type::IntTy);
+ // Create the lower part
+ F->getSSARegMap()->createVirtualRegister(RC);
+ // Create the upper part.
+ return F->getSSARegMap()->createVirtualRegister(RC)-1;
+ }
+
// Add the mapping of regnumber => reg class to MachineFunction
const TargetRegisterClass *RC =
TM.getRegisterInfo()->getRegClassForType(Ty);
- F->getSSARegMap()->addRegMap(CurReg, RC);
- return CurReg++;
+ return F->getSSARegMap()->createVirtualRegister(RC);
}
/// getReg - This method turns an LLVM value into a register number. This
@@ -228,7 +254,7 @@
RegMap.erase(V); // Assign a new name to this constant if ref'd again
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Move the address of the global into the register
- BMI(MBB, IPt, X86::MOVir32, 1, Reg).addReg(GV);
+ BMI(MBB, IPt, X86::MOVir32, 1, Reg).addGlobalAddress(GV);
RegMap.erase(V); // Assign a new name to this address if ref'd again
}
@@ -259,9 +285,9 @@
case Type::FloatTyID:
case Type::DoubleTyID: return cFP; // Floating Point is #3
+
case Type::LongTyID:
- case Type::ULongTyID: //return cLong; // Longs are class #3
- return cInt; // FIXME: LONGS ARE TREATED AS INTS!
+ case Type::ULongTyID: return cLong; // Longs are class #4
default:
assert(0 && "Invalid type to getClass!");
return cByte; // not reached
@@ -294,6 +320,20 @@
if (C->getType()->isIntegral()) {
unsigned Class = getClassB(C->getType());
+
+ if (Class == cLong) {
+ // Copy the value into the register pair.
+ uint64_t Val;
+ if (C->getType()->isSigned())
+ Val = cast<ConstantSInt>(C)->getValue();
+ else
+ Val = cast<ConstantUInt>(C)->getValue();
+
+ BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(Val & 0xFFFFFFFF);
+ BMI(MBB, IP, X86::MOVir32, 1, R+1).addZImm(Val >> 32);
+ return;
+ }
+
assert(Class <= cInt && "Type not handled yet!");
static const unsigned IntegralOpcodeTab[] = {
@@ -304,7 +344,7 @@
BMI(MBB, IP, X86::MOVir8, 1, R).addZImm(C == ConstantBool::True);
} else if (C->getType()->isSigned()) {
ConstantSInt *CSI = cast<ConstantSInt>(C);
- BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addSImm(CSI->getValue());
+ BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CSI->getValue());
} else {
ConstantUInt *CUI = cast<ConstantUInt>(C);
BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue());
@@ -316,8 +356,10 @@
else if (Value == +1.0)
BMI(MBB, IP, X86::FLD1, 0, R);
else {
- std::cerr << "Cannot load constant '" << Value << "'!\n";
- assert(0);
+ // Otherwise we need to spill the constant to memory...
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+ addConstantPoolReference(doFPLoad(MBB, IP, CFP->getType(), R), CPI);
}
} else if (isa<ConstantPointerNull>(C)) {
@@ -340,19 +382,17 @@
// X86, the stack frame looks like this:
//
// [ESP] -- return address
- // [ESP + 4] -- first argument (leftmost lexically) if four bytes in size
- // [ESP + 8] -- second argument, if four bytes in size
+ // [ESP + 4] -- first argument (leftmost lexically)
+ // [ESP + 8] -- second argument, if first argument is four bytes in size
// ...
//
- unsigned ArgOffset = 0;
+ unsigned ArgOffset = 4;
MachineFrameInfo *MFI = F->getFrameInfo();
for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
unsigned Reg = getReg(*I);
- ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
int FI; // Frame object index
-
switch (getClassB(I->getType())) {
case cByte:
FI = MFI->CreateFixedObject(1, ArgOffset);
@@ -366,6 +406,12 @@
FI = MFI->CreateFixedObject(4, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
break;
+ case cLong:
+ FI = MFI->CreateFixedObject(8, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg+1), FI, 4);
+ ArgOffset += 4; // longs require 4 additional bytes
+ break;
case cFP:
unsigned Opcode;
if (I->getType() == Type::FloatTy) {
@@ -373,14 +419,15 @@
FI = MFI->CreateFixedObject(4, ArgOffset);
} else {
Opcode = X86::FLDr64;
- ArgOffset += 4; // doubles require 4 additional bytes
FI = MFI->CreateFixedObject(8, ArgOffset);
+ ArgOffset += 4; // doubles require 4 additional bytes
}
addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
break;
default:
assert(0 && "Unhandled argument type!");
}
+ ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
}
}
@@ -390,6 +437,7 @@
/// the current one.
///
void ISel::SelectPHINodes() {
+ const MachineInstrInfo &MII = TM.getInstrInfo();
const Function &LF = *F->getFunction(); // The LLVM function...
for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
const BasicBlock *BB = I;
@@ -399,9 +447,17 @@
unsigned NumPHIs = 0;
for (BasicBlock::const_iterator I = BB->begin();
PHINode *PN = (PHINode*)dyn_cast<PHINode>(&*I); ++I) {
+
// Create a new machine instr PHI node, and insert it.
- MachineInstr *MI = BuildMI(X86::PHI, PN->getNumOperands(), getReg(*PN));
- MBB->insert(MBB->begin()+NumPHIs++, MI); // Insert it at the top of the BB
+ unsigned PHIReg = getReg(*PN);
+ MachineInstr *PhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg);
+ MBB->insert(MBB->begin()+NumPHIs++, PhiMI);
+
+ MachineInstr *LongPhiMI = 0;
+ if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy) {
+ LongPhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg+1);
+ MBB->insert(MBB->begin()+NumPHIs++, LongPhiMI);
+ }
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
@@ -410,13 +466,17 @@
// available in a virtual register, insert the computation code into
// PredMBB
//
- // FIXME: This should insert the code into the BOTTOM of the block, not
- // the top of the block. This just makes for huge live ranges...
- MachineBasicBlock::iterator PI = PredMBB->begin();
- while ((*PI)->getOpcode() == X86::PHI) ++PI;
-
- MI->addRegOperand(getReg(PN->getIncomingValue(i), PredMBB, PI));
- MI->addMachineBasicBlockOperand(PredMBB);
+ MachineBasicBlock::iterator PI = PredMBB->end();
+ while (PI != PredMBB->begin() &&
+ MII.isTerminatorInstr((*(PI-1))->getOpcode()))
+ --PI;
+ unsigned ValReg = getReg(PN->getIncomingValue(i), PredMBB, PI);
+ PhiMI->addRegOperand(ValReg);
+ PhiMI->addMachineBasicBlockOperand(PredMBB);
+ if (LongPhiMI) {
+ LongPhiMI->addRegOperand(ValReg+1);
+ LongPhiMI->addMachineBasicBlockOperand(PredMBB);
+ }
}
}
}
@@ -426,102 +486,108 @@
/// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
/// register, then move it to wherever the result should be.
-/// We handle FP setcc instructions by pushing them, doing a
-/// compare-and-pop-twice, and then copying the concodes to the main
-/// processor's concodes (I didn't make this up, it's in the Intel manual)
///
void ISel::visitSetCCInst(SetCondInst &I, unsigned OpNum) {
// The arguments are already supposed to be of the same type.
const Type *CompTy = I.getOperand(0)->getType();
+ bool isSigned = CompTy->isSigned();
unsigned reg1 = getReg(I.getOperand(0));
unsigned reg2 = getReg(I.getOperand(1));
+ unsigned DestReg = getReg(I);
- unsigned Class = getClass(CompTy);
+ // LLVM -> X86 signed X86 unsigned
+ // ----- ---------- ------------
+ // seteq -> sete sete
+ // setne -> setne setne
+ // setlt -> setl setb
+ // setgt -> setg seta
+ // setle -> setle setbe
+ // setge -> setge setae
+ static const unsigned OpcodeTab[2][6] = {
+ {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr},
+ {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr},
+ };
+
+ unsigned Class = getClassB(CompTy);
switch (Class) {
+ default: assert(0 && "Unknown type class!");
// Emit: cmp <var1>, <var2> (do the comparison). We can
// compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
// 32-bit.
case cByte:
- BuildMI (BB, X86::CMPrr8, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr8, 2).addReg(reg1).addReg(reg2);
break;
case cShort:
- BuildMI (BB, X86::CMPrr16, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr16, 2).addReg(reg1).addReg(reg2);
break;
case cInt:
- BuildMI (BB, X86::CMPrr32, 2).addReg (reg1).addReg (reg2);
- break;
-
-#if 0
- // Push the variables on the stack with fldl opcodes.
- // FIXME: assuming var1, var2 are in memory, if not, spill to
- // stack first
- case cFP: // Floats
- BuildMI (BB, X86::FLDr32, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr32, 1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2);
break;
- case cFP (doubles): // Doubles
- BuildMI (BB, X86::FLDr64, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr64, 1).addReg (reg2);
+ case cFP:
+ BuildMI(BB, X86::FpUCOM, 2).addReg(reg1).addReg(reg2);
+ BuildMI(BB, X86::FNSTSWr8, 0);
+ BuildMI(BB, X86::SAHF, 1);
+ isSigned = false; // Compare with unsigned operators
break;
-#endif
+
case cLong:
- default:
- visitInstruction(I);
- }
+ if (OpNum < 2) { // seteq, setne
+ unsigned LoTmp = makeAnotherReg(Type::IntTy);
+ unsigned HiTmp = makeAnotherReg(Type::IntTy);
+ unsigned FinalTmp = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::XORrr32, 2, LoTmp).addReg(reg1).addReg(reg2);
+ BuildMI(BB, X86::XORrr32, 2, HiTmp).addReg(reg1+1).addReg(reg2+1);
+ BuildMI(BB, X86::ORrr32, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
+ break; // Allow the sete or setne to be generated from flags set by OR
+ } else {
+ // Emit a sequence of code which compares the high and low parts once
+ // each, then uses a conditional move to handle the overflow case. For
+ // example, a setlt for long would generate code like this:
+ //
+ // AL = lo(op1) < lo(op2) // Signedness depends on operands
+ // BL = hi(op1) < hi(op2) // Always unsigned comparison
+ // dest = hi(op1) == hi(op2) ? AL : BL;
+ //
-#if 0
- if (CompTy->isFloatingPoint()) {
- // (Non-trapping) compare and pop twice.
- BuildMI (BB, X86::FUCOMPP, 0);
- // Move fp status word (concodes) to ax.
- BuildMI (BB, X86::FNSTSWr8, 1, X86::AX);
- // Load real concodes from ax.
- BuildMI (BB, X86::SAHF, 1).addReg(X86::AH);
+ // FIXME: This would be much better if we had heirarchical register
+ // classes! Until then, hardcode registers so that we can deal with their
+ // aliases (because we don't have conditional byte moves).
+ //
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2);
+ BuildMI(BB, OpcodeTab[0][OpNum], 0, X86::AL);
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1+1).addReg(reg2+1);
+ BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, X86::BL);
+ BuildMI(BB, X86::CMOVErr16, 2, X86::BX).addReg(X86::BX).addReg(X86::AX);
+ BuildMI(BB, X86::MOVrr8, 1, DestReg).addReg(X86::BL);
+ return;
+ }
}
-#endif
- // Emit setOp instruction (extract concode; clobbers ax),
- // using the following mapping:
- // LLVM -> X86 signed X86 unsigned
- // ----- ----- -----
- // seteq -> sete sete
- // setne -> setne setne
- // setlt -> setl setb
- // setgt -> setg seta
- // setle -> setle setbe
- // setge -> setge setae
-
- static const unsigned OpcodeTab[2][6] = {
- {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr},
- {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr},
- };
-
- BuildMI(BB, OpcodeTab[CompTy->isSigned()][OpNum], 0, getReg(I));
+ BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, DestReg);
}
/// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
/// operand, in the specified target register.
-void ISel::promote32 (unsigned targetReg, Value *v) {
- unsigned vReg = getReg(v);
- bool isUnsigned = v->getType()->isUnsigned();
- switch (getClass(v->getType())) {
+void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
+ bool isUnsigned = VR.Ty->isUnsigned();
+ switch (getClassB(VR.Ty)) {
case cByte:
// Extend value into target register (8->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(VR.Reg);
else
- BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(VR.Reg);
break;
case cShort:
// Extend value into target register (16->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(VR.Reg);
else
- BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(VR.Reg);
break;
case cInt:
// Move value into target register (32->32)
- BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(VR.Reg);
break;
default:
assert(0 && "Unpromotable operand class in promote32");
@@ -539,27 +605,29 @@
/// ret long, ulong : Move value into EAX/EDX and return
/// ret float/double : Top of FP stack
///
-void ISel::visitReturnInst (ReturnInst &I) {
+void ISel::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() == 0) {
BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
return;
}
Value *RetVal = I.getOperand(0);
- switch (getClass(RetVal->getType())) {
+ unsigned RetReg = getReg(RetVal);
+ switch (getClassB(RetVal->getType())) {
case cByte: // integral return values: extend or move into EAX and return
case cShort:
case cInt:
- promote32(X86::EAX, RetVal);
+ promote32(X86::EAX, ValueRecord(RetReg, RetVal->getType()));
break;
case cFP: // Floats & Doubles: Return in ST(0)
- BuildMI(BB, X86::FpMOV, 1, X86::ST0).addReg(getReg(RetVal));
+ BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
break;
case cLong:
- // ret long: use EAX(least significant 32 bits)/EDX (most
- // significant 32)...
+ BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(RetReg);
+ BuildMI(BB, X86::MOVrr32, 1, X86::EDX).addReg(RetReg+1);
+ break;
default:
- visitInstruction (I);
+ visitInstruction(I);
}
// Emit a 'ret' instruction
BuildMI(BB, X86::RET, 0);
@@ -572,36 +640,33 @@
///
void ISel::visitBranchInst(BranchInst &BI) {
if (BI.isConditional()) {
- BasicBlock *ifTrue = BI.getSuccessor(0);
- BasicBlock *ifFalse = BI.getSuccessor(1);
-
- // Compare condition with zero, followed by jump-if-equal to ifFalse, and
- // jump-if-nonequal to ifTrue
unsigned condReg = getReg(BI.getCondition());
BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0);
- BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0));
BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1));
- } else { // unconditional branch
- BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
}
+ BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
}
-/// visitCallInst - Push args on stack and do a procedure call instruction.
-void ISel::visitCallInst(CallInst &CI) {
+
+/// doCall - This emits an abstract call instruction, setting up the arguments
+/// and the return value as appropriate. For the actual function call itself,
+/// it inserts the specified CallMI instruction into the stream.
+///
+void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
+ const std::vector<ValueRecord> &Args) {
+
// Count how many bytes are to be pushed on the stack...
unsigned NumBytes = 0;
- if (CI.getNumOperands() > 1) {
- for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
- switch (getClass(CI.getOperand(i)->getType())) {
+ if (!Args.empty()) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ switch (getClassB(Args[i].Ty)) {
case cByte: case cShort: case cInt:
- NumBytes += 4;
- break;
+ NumBytes += 4; break;
case cLong:
- NumBytes += 8;
- break;
+ NumBytes += 8; break;
case cFP:
- NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8;
+ NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8;
break;
default: assert(0 && "Unknown class!");
}
@@ -611,60 +676,60 @@
// Arguments go on the stack in reverse order, as specified by the ABI.
unsigned ArgOffset = 0;
- for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) {
- Value *Arg = CI.getOperand(i);
- switch (getClass(Arg->getType())) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ unsigned ArgReg = Args[i].Reg;
+ switch (getClassB(Args[i].Ty)) {
case cByte:
case cShort: {
// Promote arg to 32 bits wide into a temporary register...
unsigned R = makeAnotherReg(Type::UIntTy);
- promote32(R, Arg);
+ promote32(R, Args[i]);
addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
X86::ESP, ArgOffset).addReg(R);
break;
}
case cInt:
addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
break;
-
+ case cLong:
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ X86::ESP, ArgOffset+4).addReg(ArgReg+1);
+ ArgOffset += 4; // 8 byte entry, not 4.
+ break;
+
case cFP:
- if (Arg->getType() == Type::FloatTy) {
+ if (Args[i].Ty == Type::FloatTy) {
addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
} else {
- assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!");
- ArgOffset += 4;
- addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
+ addRegOffset(BuildMI(BB, X86::FSTr64, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ ArgOffset += 4; // 8 byte entry, not 4.
}
break;
- default:
- // FIXME: long/ulong/float/double args not handled.
- visitInstruction(CI);
- break;
+ default: assert(0 && "Unknown class!");
}
ArgOffset += 4;
}
- }
-
- if (Function *F = CI.getCalledFunction()) {
- // Emit a CALL instruction with PC-relative displacement.
- BuildMI(BB, X86::CALLpcrel32, 1).addPCDisp(F);
} else {
- unsigned Reg = getReg(CI.getCalledValue());
- BuildMI(BB, X86::CALLr32, 1).addReg(Reg);
+ BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(0);
}
+ BB->push_back(CallMI);
+
BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes);
// If there is a return value, scavenge the result from the location the call
// leaves it in...
//
- if (CI.getType() != Type::VoidTy) {
- unsigned resultTypeClass = getClass(CI.getType());
- switch (resultTypeClass) {
+ if (Ret.Ty != Type::VoidTy) {
+ unsigned DestClass = getClassB(Ret.Ty);
+ switch (DestClass) {
case cByte:
case cShort:
case cInt: {
@@ -674,32 +739,49 @@
X86::MOVrr8, X86::MOVrr16, X86::MOVrr32
};
static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
- BuildMI(BB, regRegMove[resultTypeClass], 1, getReg(CI))
- .addReg(AReg[resultTypeClass]);
+ BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
break;
}
case cFP: // Floating-point return values live in %ST(0)
- BuildMI(BB, X86::FpMOV, 1, getReg(CI)).addReg(X86::ST0);
+ BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
break;
- default:
- std::cerr << "Cannot get return value for call of type '"
- << *CI.getType() << "'\n";
- visitInstruction(CI);
+ case cLong: // Long values are left in EDX:EAX
+ BuildMI(BB, X86::MOVrr32, 1, Ret.Reg).addReg(X86::EAX);
+ BuildMI(BB, X86::MOVrr32, 1, Ret.Reg+1).addReg(X86::EDX);
+ break;
+ default: assert(0 && "Unknown class!");
}
}
}
+
+/// visitCallInst - Push args on stack and do a procedure call instruction.
+void ISel::visitCallInst(CallInst &CI) {
+ MachineInstr *TheCall;
+ if (Function *F = CI.getCalledFunction()) {
+ // Emit a CALL instruction with PC-relative displacement.
+ TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
+ } else { // Emit an indirect call...
+ unsigned Reg = getReg(CI.getCalledValue());
+ TheCall = BuildMI(X86::CALLr32, 1).addReg(Reg);
+ }
+
+ std::vector<ValueRecord> Args;
+ for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
+ Args.push_back(ValueRecord(getReg(CI.getOperand(i)),
+ CI.getOperand(i)->getType()));
+
+ unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
+ doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
+}
+
+
/// visitSimpleBinary - Implement simple binary operators for integral types...
/// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or,
/// 4 for Xor.
///
void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
- if (B.getType() == Type::BoolTy) // FIXME: Handle bools for logicals
- visitInstruction(B);
-
- unsigned Class = getClass(B.getType());
- if (Class > cFP) // FIXME: Handle longs
- visitInstruction(B);
+ unsigned Class = getClassB(B.getType());
static const unsigned OpcodeTab[][4] = {
// Arithmetic operators
@@ -711,28 +793,45 @@
{ X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR
{ X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR
};
+
+ bool isLong = false;
+ if (Class == cLong) {
+ isLong = true;
+ Class = cInt; // Bottom 32 bits are handled just like ints
+ }
unsigned Opcode = OpcodeTab[OperatorClass][Class];
assert(Opcode && "Floating point arguments to logical inst?");
unsigned Op0r = getReg(B.getOperand(0));
unsigned Op1r = getReg(B.getOperand(1));
- BuildMI(BB, Opcode, 2, getReg(B)).addReg(Op0r).addReg(Op1r);
+ unsigned DestReg = getReg(B);
+ BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+
+ if (isLong) { // Handle the upper 32 bits of long values...
+ static const unsigned TopTab[] = {
+ X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32
+ };
+ BuildMI(BB, TopTab[OperatorClass], 2,
+ DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
+ }
}
-/// doMultiply - Emit appropriate instructions to multiply together
-/// the registers op0Reg and op1Reg, and put the result in destReg.
-/// The type of the result should be given as resultType.
+/// doMultiply - Emit appropriate instructions to multiply together the
+/// registers op0Reg and op1Reg, and put the result in DestReg. The type of the
+/// result should be given as DestTy.
+///
+/// FIXME: doMultiply should use one of the two address IMUL instructions!
+///
void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
- unsigned destReg, const Type *resultType,
+ unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned op1Reg) {
- unsigned Class = getClass(resultType);
+ unsigned Class = getClass(DestTy);
switch (Class) {
case cFP: // Floating point multiply
- BuildMI(BB, X86::FpMUL, 2, destReg).addReg(op0Reg).addReg(op1Reg);
+ BMI(BB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
return;
default:
- case cLong:
- assert(0 && "doMultiply not implemented for this class yet!");
+ case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
case cByte:
case cShort:
case cInt: // Small integerals, handled below...
@@ -740,30 +839,58 @@
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
- static const unsigned MulOpcode[]={ X86::MULrr8, X86::MULrr16, X86::MULrr32 };
+ static const unsigned MulOpcode[]={ X86::MULr8 , X86::MULr16 , X86::MULr32 };
static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
unsigned Reg = Regs[Class];
// Emit a MOV to put the first operand into the appropriately-sized
// subreg of EAX.
- BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg (op0Reg);
+ BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg(op0Reg);
// Emit the appropriate multiply instruction.
- BMI(MBB, MBBI, MulOpcode[Class], 1).addReg (op1Reg);
+ BMI(MBB, MBBI, MulOpcode[Class], 1).addReg(op1Reg);
// Emit another MOV to put the result into the destination register.
- BMI(MBB, MBBI, MovOpcode[Class], 1, destReg).addReg (Reg);
+ BMI(MBB, MBBI, MovOpcode[Class], 1, DestReg).addReg(Reg);
}
/// visitMul - Multiplies are not simple binary operators because they must deal
/// with the EAX register explicitly.
///
void ISel::visitMul(BinaryOperator &I) {
- unsigned DestReg = getReg(I);
unsigned Op0Reg = getReg(I.getOperand(0));
unsigned Op1Reg = getReg(I.getOperand(1));
- MachineBasicBlock::iterator MBBI = BB->end();
- doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
+ unsigned DestReg = getReg(I);
+
+ // Simple scalar multiply?
+ if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
+ MachineBasicBlock::iterator MBBI = BB->end();
+ doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
+ } else {
+ // Long value. We have to do things the hard way...
+ // Multiply the two low parts... capturing carry into EDX
+ BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, X86::MULr32, 1).addReg(Op1Reg); // AL*BL
+
+ unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, X86::MOVrr32, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+
+ MachineBasicBlock::iterator MBBI = BB->end();
+ unsigned AHBLReg = makeAnotherReg(Type::UIntTy);
+ doMultiply(BB, MBBI, AHBLReg, Type::UIntTy, Op0Reg+1, Op1Reg); // AH*BL
+
+ unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::ADDrr32, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+
+ MBBI = BB->end();
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy);
+ doMultiply(BB, MBBI, ALBHReg, Type::UIntTy, Op0Reg, Op1Reg+1); // AL*BH
+
+ BuildMI(BB, X86::ADDrr32, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ }
}
@@ -779,19 +906,36 @@
unsigned ResultReg = getReg(I);
switch (Class) {
- case cFP: // Floating point multiply
+ case cFP: // Floating point divide
if (I.getOpcode() == Instruction::Div)
BuildMI(BB, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
- else
- BuildMI(BB, X86::FpREM, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ else { // Floating point remainder...
+ MachineInstr *TheCall =
+ BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy));
+ Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy));
+ doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args);
+ }
return;
- default:
- case cLong:
- assert(0 && "div/rem not implemented for this class yet!");
- case cByte:
- case cShort:
- case cInt: // Small integerals, handled below...
- break;
+ case cLong: {
+ static const char *FnName[] =
+ { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
+
+ unsigned NameIdx = I.getType()->isUnsigned()*2;
+ NameIdx += I.getOpcode() == Instruction::Div;
+ MachineInstr *TheCall =
+ BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
+
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
+ Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
+ doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args);
+ return;
+ }
+ case cByte: case cShort: case cInt:
+ break; // Small integerals, handled below...
+ default: assert(0 && "Unknown class!");
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
@@ -801,8 +945,8 @@
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
static const unsigned DivOpcode[][4] = {
- { X86::DIVrr8 , X86::DIVrr16 , X86::DIVrr32 , 0 }, // Unsigned division
- { X86::IDIVrr8, X86::IDIVrr16, X86::IDIVrr32, 0 }, // Signed division
+ { X86::DIVr8 , X86::DIVr16 , X86::DIVr32 , 0 }, // Unsigned division
+ { X86::IDIVr8, X86::IDIVr16, X86::IDIVr32, 0 }, // Signed division
};
bool isSigned = I.getType()->isSigned();
@@ -836,60 +980,149 @@
/// shift values equal to 1. Even the general case is sort of special,
/// because the shift amount has to be in CL, not just any old register.
///
-void ISel::visitShiftInst (ShiftInst &I) {
- unsigned Op0r = getReg (I.getOperand(0));
+void ISel::visitShiftInst(ShiftInst &I) {
+ unsigned SrcReg = getReg(I.getOperand(0));
unsigned DestReg = getReg(I);
bool isLeftShift = I.getOpcode() == Instruction::Shl;
- bool isOperandSigned = I.getType()->isUnsigned();
- unsigned OperandClass = getClass(I.getType());
+ bool isSigned = I.getType()->isSigned();
+ unsigned Class = getClass(I.getType());
+
+ static const unsigned ConstantOperand[][4] = {
+ { X86::SHRir8, X86::SHRir16, X86::SHRir32, X86::SHRDir32 }, // SHR
+ { X86::SARir8, X86::SARir16, X86::SARir32, X86::SHRDir32 }, // SAR
+ { X86::SHLir8, X86::SHLir16, X86::SHLir32, X86::SHLDir32 }, // SHL
+ { X86::SHLir8, X86::SHLir16, X86::SHLir32, X86::SHLDir32 }, // SAL = SHL
+ };
- if (OperandClass > cInt)
- visitInstruction(I); // Can't handle longs yet!
+ static const unsigned NonConstantOperand[][4] = {
+ { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32 }, // SHR
+ { X86::SARrr8, X86::SARrr16, X86::SARrr32 }, // SAR
+ { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32 }, // SHL
+ { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32 }, // SAL = SHL
+ };
- if (ConstantUInt *CUI = dyn_cast<ConstantUInt> (I.getOperand (1)))
- {
- // The shift amount is constant, guaranteed to be a ubyte. Get its value.
- assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
- unsigned char shAmt = CUI->getValue();
-
- static const unsigned ConstantOperand[][4] = {
- { X86::SHRir8, X86::SHRir16, X86::SHRir32, 0 }, // SHR
- { X86::SARir8, X86::SARir16, X86::SARir32, 0 }, // SAR
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SHL
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SAL = SHL
- };
+ // Longs, as usual, are handled specially...
+ if (Class == cLong) {
+ // If we have a constant shift, we can generate much more efficient code
+ // than otherwise...
+ //
+ if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(I.getOperand(1))) {
+ unsigned Amount = CUI->getValue();
+ if (Amount < 32) {
+ const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
+ if (isLeftShift) {
+ BuildMI(BB, Opc[3], 3,
+ DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addZImm(Amount);
+ BuildMI(BB, Opc[2], 2, DestReg).addReg(SrcReg).addZImm(Amount);
+ } else {
+ BuildMI(BB, Opc[3], 3,
+ DestReg).addReg(SrcReg ).addReg(SrcReg+1).addZImm(Amount);
+ BuildMI(BB, Opc[2], 2, DestReg+1).addReg(SrcReg+1).addZImm(Amount);
+ }
+ } else { // Shifting more than 32 bits
+ Amount -= 32;
+ if (isLeftShift) {
+ BuildMI(BB, X86::SHLir32, 2,DestReg+1).addReg(SrcReg).addZImm(Amount);
+ BuildMI(BB, X86::MOVir32, 1,DestReg ).addZImm(0);
+ } else {
+ unsigned Opcode = isSigned ? X86::SARir32 : X86::SHRir32;
+ BuildMI(BB, Opcode, 2, DestReg).addReg(SrcReg+1).addZImm(Amount);
+ BuildMI(BB, X86::MOVir32, 1, DestReg+1).addZImm(0);
+ }
+ }
+ } else {
+ visitInstruction(I); // FIXME: Implement long shift by non-constant
+ }
+ return;
+ }
- const unsigned *OpTab = // Figure out the operand table to use
- ConstantOperand[isLeftShift*2+isOperandSigned];
+ if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(I.getOperand(1))) {
+ // The shift amount is constant, guaranteed to be a ubyte. Get its value.
+ assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
- // Emit: <insn> reg, shamt (shift-by-immediate opcode "ir" form.)
- BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addZImm(shAmt);
- }
- else
- {
- // The shift amount is non-constant.
- //
- // In fact, you can only shift with a variable shift amount if
- // that amount is already in the CL register, so we have to put it
- // there first.
- //
+ const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
+ BuildMI(BB, Opc[Class], 2, DestReg).addReg(SrcReg).addZImm(CUI->getValue());
+ } else { // The shift amount is non-constant.
+ BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
- // Emit: move cl, shiftAmount (put the shift amount in CL.)
- BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
+ const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned];
+ BuildMI(BB, Opc[Class], 1, DestReg).addReg(SrcReg);
+ }
+}
- // This is a shift right (SHR).
- static const unsigned NonConstantOperand[][4] = {
- { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32, 0 }, // SHR
- { X86::SARrr8, X86::SARrr16, X86::SARrr32, 0 }, // SAR
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SHL
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SAL = SHL
- };
- const unsigned *OpTab = // Figure out the operand table to use
- NonConstantOperand[isLeftShift*2+isOperandSigned];
+/// doFPLoad - This method is used to load an FP value from memory using the
+/// current endianness. NOTE: This method returns a partially constructed load
+/// instruction which needs to have the memory source filled in still.
+///
+MachineInstr *ISel::doFPLoad(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const Type *Ty, unsigned DestReg) {
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLDr32 : X86::FLDr64;
- BuildMI(BB, OpTab[OperandClass], 1, DestReg).addReg(Op0r);
- }
+ if (TM.getTargetData().isLittleEndian()) // fast path...
+ return BMI(MBB, MBBI, LoadOpcode, 4, DestReg);
+
+ // If we are big-endian, start by creating an LEA instruction to represent the
+ // address of the memory location to load from...
+ //
+ unsigned SrcAddrReg = makeAnotherReg(Type::UIntTy);
+ MachineInstr *Result = BMI(MBB, MBBI, X86::LEAr32, 5, SrcAddrReg);
+
+ // Allocate a temporary stack slot to transform the value into...
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(Ty, TM.getTargetData());
+
+ // Perform the bswaps 32 bits at a time...
+ unsigned TmpReg1 = makeAnotherReg(Type::UIntTy);
+ unsigned TmpReg2 = makeAnotherReg(Type::UIntTy);
+ addDirectMem(BMI(MBB, MBBI, X86::MOVmr32, 4, TmpReg1), SrcAddrReg);
+ BMI(MBB, MBBI, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addFrameReference(BMI(MBB, MBBI, X86::MOVrm32, 5),
+ FrameIdx, Offset).addReg(TmpReg2);
+
+ if (Ty == Type::DoubleTy) { // Swap the other 32 bits of a double value...
+ TmpReg1 = makeAnotherReg(Type::UIntTy);
+ TmpReg2 = makeAnotherReg(Type::UIntTy);
+
+ addRegOffset(BMI(MBB, MBBI, X86::MOVmr32, 4, TmpReg1), SrcAddrReg, 4);
+ BMI(MBB, MBBI, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addFrameReference(BMI(MBB, MBBI, X86::MOVrm32,5), FrameIdx).addReg(TmpReg2);
+ }
+
+ // Now we can reload the final byteswapped result into the final destination.
+ addFrameReference(BMI(MBB, MBBI, LoadOpcode, 4, DestReg), FrameIdx);
+ return Result;
+}
+
+/// EmitByteSwap - Byteswap SrcReg into DestReg.
+///
+void ISel::EmitByteSwap(unsigned DestReg, unsigned SrcReg, unsigned Class) {
+ // Emit the byte swap instruction...
+ switch (Class) {
+ case cByte:
+ // No byteswap neccesary for 8 bit value...
+ BuildMI(BB, X86::MOVrr8, 1, DestReg).addReg(SrcReg);
+ break;
+ case cInt:
+ // Use the 32 bit bswap instruction to do a 32 bit swap...
+ BuildMI(BB, X86::BSWAPr32, 1, DestReg).addReg(SrcReg);
+ break;
+
+ case cShort:
+ // For 16 bit we have to use an xchg instruction, because there is no
+ // 16-bit bswap. XCHG is neccesarily not in SSA form, so we force things
+ // into AX to do the xchg.
+ //
+ BuildMI(BB, X86::MOVrr16, 1, X86::AX).addReg(SrcReg);
+ BuildMI(BB, X86::XCHGrr8, 2).addReg(X86::AL, MOTy::UseAndDef)
+ .addReg(X86::AH, MOTy::UseAndDef);
+ BuildMI(BB, X86::MOVrr16, 1, DestReg).addReg(X86::AX);
+ break;
+ default: assert(0 && "Cannot byteswap this class!");
+ }
}
@@ -905,16 +1138,14 @@
unsigned Class = getClass(I.getType());
switch (Class) {
- default: visitInstruction(I); // FIXME: Handle longs...
case cFP: {
- // FIXME: Handle endian swapping for FP values.
- unsigned Opcode = I.getType() == Type::FloatTy ? X86::FLDr32 : X86::FLDr64;
- addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg);
+ MachineBasicBlock::iterator MBBI = BB->end();
+ addDirectMem(doFPLoad(BB, MBBI, I.getType(), DestReg), SrcAddrReg);
return;
}
- case cInt: // Integers of various sizes handled below
- case cShort:
- case cByte: break;
+ case cLong: case cInt: case cShort: case cByte:
+ break; // Integers of various sizes handled below
+ default: assert(0 && "Unknown memory class!");
}
// We need to adjust the input pointer if we are emulating a big-endian
@@ -929,38 +1160,69 @@
}
unsigned IReg = DestReg;
- if (!isLittleEndian) { // If big endian we need an intermediate stage
- IReg = makeAnotherReg(I.getType());
- std::swap(IReg, DestReg);
- }
+ if (!isLittleEndian) // If big endian we need an intermediate stage
+ DestReg = makeAnotherReg(Class != cLong ? I.getType() : Type::UIntTy);
- static const unsigned Opcode[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
+ static const unsigned Opcode[] = {
+ X86::MOVmr8, X86::MOVmr16, X86::MOVmr32, 0, X86::MOVmr32
+ };
addDirectMem(BuildMI(BB, Opcode[Class], 4, DestReg), SrcAddrReg);
- if (!isLittleEndian) {
- // Emit the byte swap instruction...
- switch (Class) {
- case cByte:
- // No byteswap neccesary for 8 bit value...
- BuildMI(BB, X86::MOVrr8, 1, IReg).addReg(DestReg);
- break;
- case cInt:
- // Use the 32 bit bswap instruction to do a 32 bit swap...
- BuildMI(BB, X86::BSWAPr32, 1, IReg).addReg(DestReg);
- break;
-
- case cShort:
- // For 16 bit we have to use an xchg instruction, because there is no
- // 16-bit bswap. XCHG is neccesarily not in SSA form, so we force things
- // into AX to do the xchg.
- //
- BuildMI(BB, X86::MOVrr16, 1, X86::AX).addReg(DestReg);
- BuildMI(BB, X86::XCHGrr8, 2).addReg(X86::AL, MOTy::UseAndDef)
- .addReg(X86::AH, MOTy::UseAndDef);
- BuildMI(BB, X86::MOVrr16, 1, DestReg).addReg(X86::AX);
- break;
- default: assert(0 && "Class not handled yet!");
+ // Handle long values now...
+ if (Class == cLong) {
+ if (isLittleEndian) {
+ addRegOffset(BuildMI(BB, X86::MOVmr32, 4, DestReg+1), SrcAddrReg, 4);
+ } else {
+ EmitByteSwap(IReg+1, DestReg, cInt);
+ unsigned TempReg = makeAnotherReg(Type::IntTy);
+ addRegOffset(BuildMI(BB, X86::MOVmr32, 4, TempReg), SrcAddrReg, 4);
+ EmitByteSwap(IReg, TempReg, cInt);
}
+ return;
+ }
+
+ if (!isLittleEndian)
+ EmitByteSwap(IReg, DestReg, Class);
+}
+
+
+/// doFPStore - This method is used to store an FP value to memory using the
+/// current endianness.
+///
+void ISel::doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg) {
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned StoreOpcode = Ty == Type::FloatTy ? X86::FSTr32 : X86::FSTr64;
+
+ if (TM.getTargetData().isLittleEndian()) { // fast path...
+ addDirectMem(BuildMI(BB, StoreOpcode,5), DestAddrReg).addReg(SrcReg);
+ return;
+ }
+
+ // Allocate a temporary stack slot to transform the value into...
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(Ty, TM.getTargetData());
+ unsigned SrcAddrReg = makeAnotherReg(Type::UIntTy);
+ addFrameReference(BuildMI(BB, X86::LEAr32, 5, SrcAddrReg), FrameIdx);
+
+ // Store the value into a temporary stack slot...
+ addDirectMem(BuildMI(BB, StoreOpcode, 5), SrcAddrReg).addReg(SrcReg);
+
+ // Perform the bswaps 32 bits at a time...
+ unsigned TmpReg1 = makeAnotherReg(Type::UIntTy);
+ unsigned TmpReg2 = makeAnotherReg(Type::UIntTy);
+ addDirectMem(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ DestAddrReg, Offset).addReg(TmpReg2);
+
+ if (Ty == Type::DoubleTy) { // Swap the other 32 bits of a double value...
+ TmpReg1 = makeAnotherReg(Type::UIntTy);
+ TmpReg2 = makeAnotherReg(Type::UIntTy);
+
+ addRegOffset(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg, 4);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addDirectMem(BuildMI(BB, X86::MOVrm32, 5), DestAddrReg).addReg(TmpReg2);
}
}
@@ -971,22 +1233,31 @@
void ISel::visitStoreInst(StoreInst &I) {
bool isLittleEndian = TM.getTargetData().isLittleEndian();
bool hasLongPointers = TM.getTargetData().getPointerSize() == 8;
- unsigned ValReg = getReg(I.getOperand(0));
- unsigned AddressReg = getReg(I.getOperand(1));
+ unsigned ValReg = getReg(I.getOperand(0));
+ unsigned AddressReg = getReg(I.getOperand(1));
unsigned Class = getClass(I.getOperand(0)->getType());
switch (Class) {
- default: visitInstruction(I); // FIXME: Handle longs...
- case cFP: {
- // FIXME: Handle endian swapping for FP values.
- unsigned Opcode = I.getOperand(0)->getType() == Type::FloatTy ?
- X86::FSTr32 : X86::FSTr64;
- addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg);
+ case cLong:
+ if (isLittleEndian) {
+ addDirectMem(BuildMI(BB, X86::MOVrm32, 1+4), AddressReg).addReg(ValReg);
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 1+4),
+ AddressReg, 4).addReg(ValReg+1);
+ } else {
+ unsigned T1 = makeAnotherReg(Type::IntTy);
+ unsigned T2 = makeAnotherReg(Type::IntTy);
+ EmitByteSwap(T1, ValReg , cInt);
+ EmitByteSwap(T2, ValReg+1, cInt);
+ addDirectMem(BuildMI(BB, X86::MOVrm32, 1+4), AddressReg).addReg(T2);
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 1+4), AddressReg, 4).addReg(T1);
+ }
return;
- }
- case cInt: // Integers of various sizes handled below
- case cShort:
- case cByte: break;
+ case cFP:
+ doFPStore(I.getOperand(0)->getType(), AddressReg, ValReg);
+ return;
+ case cInt: case cShort: case cByte:
+ break; // Integers of various sizes handled below
+ default: assert(0 && "Unknown memory class!");
}
if (!isLittleEndian && hasLongPointers &&
@@ -997,26 +1268,9 @@
}
if (!isLittleEndian && Class != cByte) {
- // Emit a byte swap instruction...
- switch (Class) {
- case cInt: {
- unsigned R = makeAnotherReg(I.getOperand(0)->getType());
- BuildMI(BB, X86::BSWAPr32, 1, R).addReg(ValReg);
- ValReg = R;
- break;
- }
- case cShort:
- // For 16 bit we have to use an xchg instruction, because there is no
- // 16-bit bswap. XCHG is neccesarily not in SSA form, so we force things
- // into AX to do the xchg.
- //
- BuildMI(BB, X86::MOVrr16, 1, X86::AX).addReg(ValReg);
- BuildMI(BB, X86::XCHGrr8, 2).addReg(X86::AL, MOTy::UseAndDef)
- .addReg(X86::AH, MOTy::UseAndDef);
- ValReg = X86::AX;
- break;
- default: assert(0 && "Unknown class!");
- }
+ unsigned R = makeAnotherReg(I.getOperand(0)->getType());
+ EmitByteSwap(R, ValReg, Class);
+ ValReg = R;
}
static const unsigned Opcode[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
@@ -1026,85 +1280,195 @@
/// visitCastInst - Here we have various kinds of copying with or without
/// sign extension going on.
-void
-ISel::visitCastInst (CastInst &CI)
-{
- const Type *targetType = CI.getType ();
- Value *operand = CI.getOperand (0);
- unsigned operandReg = getReg (operand);
- const Type *sourceType = operand->getType ();
- unsigned destReg = getReg (CI);
- //
- // Currently we handle:
- //
- // 1) cast * to bool
- //
- // 2) cast {sbyte, ubyte} to {sbyte, ubyte}
- // cast {short, ushort} to {ushort, short}
- // cast {int, uint, ptr} to {int, uint, ptr}
- //
- // 3) cast {sbyte, ubyte} to {ushort, short}
- // cast {sbyte, ubyte} to {int, uint, ptr}
- // cast {short, ushort} to {int, uint, ptr}
- //
- // 4) cast {int, uint, ptr} to {short, ushort}
- // cast {int, uint, ptr} to {sbyte, ubyte}
- // cast {short, ushort} to {sbyte, ubyte}
-
- // 1) Implement casts to bool by using compare on the operand followed
- // by set if not zero on the result.
- if (targetType == Type::BoolTy)
- {
- BuildMI (BB, X86::CMPri8, 2).addReg (operandReg).addZImm (0);
- BuildMI (BB, X86::SETNEr, 1, destReg);
- return;
- }
+void ISel::visitCastInst(CastInst &CI) {
+ const Type *DestTy = CI.getType();
+ Value *Src = CI.getOperand(0);
+ unsigned SrcReg = getReg(Src);
+ const Type *SrcTy = Src->getType();
+ unsigned SrcClass = getClassB(SrcTy);
+ unsigned DestReg = getReg(CI);
+ unsigned DestClass = getClassB(DestTy);
+
+ // Implement casts to bool by using compare on the operand followed by set if
+ // not zero on the result.
+ if (DestTy == Type::BoolTy) {
+ if (SrcClass == cFP || SrcClass == cLong)
+ visitInstruction(CI);
+
+ BuildMI(BB, X86::CMPri8, 2).addReg(SrcReg).addZImm(0);
+ BuildMI(BB, X86::SETNEr, 1, DestReg);
+ return;
+ }
- // 2) Implement casts between values of the same type class (as determined
- // by getClass) by using a register-to-register move.
- unsigned srcClass = getClassB(sourceType);
- unsigned targClass = getClass(targetType);
- static const unsigned regRegMove[] = {
- X86::MOVrr8, X86::MOVrr16, X86::MOVrr32
+ static const unsigned RegRegMove[] = {
+ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32, X86::FpMOV, X86::MOVrr32
};
- if (srcClass <= cInt && targClass <= cInt && srcClass == targClass) {
- BuildMI(BB, regRegMove[srcClass], 1, destReg).addReg(operandReg);
+ // Implement casts between values of the same type class (as determined by
+ // getClass) by using a register-to-register move.
+ if (SrcClass == DestClass) {
+ if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
+ BuildMI(BB, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
+ } else if (SrcClass == cFP) {
+ if (SrcTy == Type::FloatTy) { // double -> float
+ assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
+ BuildMI(BB, X86::FpMOV, 1, DestReg).addReg(SrcReg);
+ } else { // float -> double
+ assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
+ "Unknown cFP member!");
+ // Truncate from double to float by storing to memory as short, then
+ // reading it back.
+ unsigned FltAlign = TM.getTargetData().getFloatAlignment();
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
+ addFrameReference(BuildMI(BB, X86::FSTr32, 5), FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(BB, X86::FLDr32, 5, DestReg), FrameIdx);
+ }
+ } else if (SrcClass == cLong) {
+ BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(SrcReg);
+ BuildMI(BB, X86::MOVrr32, 1, DestReg+1).addReg(SrcReg+1);
+ } else {
+ visitInstruction(CI);
+ }
return;
}
- // 3) Handle cast of SMALLER int to LARGER int using a move with sign
- // extension or zero extension, depending on whether the source type
- // was signed.
- if ((srcClass <= cInt) && (targClass <= cInt) && (srcClass < targClass))
- {
- static const unsigned ops[] = {
- X86::MOVSXr16r8, X86::MOVSXr32r8, X86::MOVSXr32r16,
- X86::MOVZXr16r8, X86::MOVZXr32r8, X86::MOVZXr32r16
- };
- unsigned srcSigned = sourceType->isSigned ();
- BuildMI (BB, ops[3 * srcSigned + srcClass + targClass - 1], 1,
- destReg).addReg (operandReg);
- return;
+
+ // Handle cast of SMALLER int to LARGER int using a move with sign extension
+ // or zero extension, depending on whether the source type was signed.
+ if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
+ SrcClass < DestClass) {
+ bool isLong = DestClass == cLong;
+ if (isLong) DestClass = cInt;
+
+ static const unsigned Opc[][4] = {
+ { X86::MOVSXr16r8, X86::MOVSXr32r8, X86::MOVSXr32r16, X86::MOVrr32 }, // s
+ { X86::MOVZXr16r8, X86::MOVZXr32r8, X86::MOVZXr32r16, X86::MOVrr32 } // u
+ };
+
+ bool isUnsigned = SrcTy->isUnsigned();
+ BuildMI(BB, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
+ DestReg).addReg(SrcReg);
+
+ if (isLong) { // Handle upper 32 bits as appropriate...
+ if (isUnsigned) // Zero out top bits...
+ BuildMI(BB, X86::MOVir32, 1, DestReg+1).addZImm(0);
+ else // Sign extend bottom half...
+ BuildMI(BB, X86::SARir32, 2, DestReg+1).addReg(DestReg).addZImm(31);
}
- // 4) Handle cast of LARGER int to SMALLER int using a move to EAX
- // followed by a move out of AX or AL.
- if ((srcClass <= cInt) && (targClass <= cInt) && (srcClass > targClass))
- {
- static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
- BuildMI (BB, regRegMove[srcClass], 1,
- AReg[srcClass]).addReg (operandReg);
- BuildMI (BB, regRegMove[targClass], 1, destReg).addReg (AReg[srcClass]);
- return;
+ return;
+ }
+
+ // Special case long -> int ...
+ if (SrcClass == cLong && DestClass == cInt) {
+ BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(SrcReg);
+ return;
+ }
+
+ // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
+ // move out of AX or AL.
+ if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
+ && SrcClass > DestClass) {
+ static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX };
+ BuildMI(BB, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
+ BuildMI(BB, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
+ return;
+ }
+
+ // Handle casts from integer to floating point now...
+ if (DestClass == cFP) {
+ // unsigned int -> load as 64 bit int.
+ // unsigned long long -> more complex
+ if (SrcTy->isUnsigned() && SrcTy != Type::UByteTy)
+ visitInstruction(CI); // don't handle unsigned src yet!
+
+ // We don't have the facilities for directly loading byte sized data from
+ // memory. Promote it to 16 bits.
+ if (SrcClass == cByte) {
+ unsigned TmpReg = makeAnotherReg(Type::ShortTy);
+ BuildMI(BB, SrcTy->isSigned() ? X86::MOVSXr16r8 : X86::MOVZXr16r8,
+ 1, TmpReg).addReg(SrcReg);
+ SrcTy = Type::ShortTy; // Pretend the short is our input now!
+ SrcClass = cShort;
+ SrcReg = TmpReg;
+ }
+
+ // Spill the integer to memory and reload it from there...
+ int FrameIdx =
+ F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
+
+ if (SrcClass == cLong) {
+ if (SrcTy == Type::ULongTy) visitInstruction(CI);
+ addFrameReference(BuildMI(BB, X86::MOVrm32, 5), FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(BB, X86::MOVrm32, 5),
+ FrameIdx, 4).addReg(SrcReg+1);
+ } else {
+ static const unsigned Op1[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
+ addFrameReference(BuildMI(BB, Op1[SrcClass], 5), FrameIdx).addReg(SrcReg);
+ }
+
+ static const unsigned Op2[] =
+ { 0, X86::FILDr16, X86::FILDr32, 0, X86::FILDr64 };
+ addFrameReference(BuildMI(BB, Op2[SrcClass], 5, DestReg), FrameIdx);
+ return;
+ }
+
+ // Handle casts from floating point to integer now...
+ if (SrcClass == cFP) {
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ //
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, X86::FNSTCWm16, 4), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy);
+ addFrameReference(BuildMI(BB, X86::MOVmr8, 4, HighPartOfCW), CWFrameIdx, 1);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(BB, X86::MOVim8, 5), CWFrameIdx, 1).addZImm(12);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(BB, X86::FLDCWm16, 4), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(BB, X86::MOVrm8, 5),
+ CWFrameIdx, 1).addReg(HighPartOfCW);
+
+ // We don't have the facilities for directly storing byte sized data to
+ // memory. Promote it to 16 bits. We also must promote unsigned values to
+ // larger classes because we only have signed FP stores.
+ unsigned StoreClass = DestClass;
+ const Type *StoreTy = DestTy;
+ if (StoreClass == cByte || DestTy->isUnsigned())
+ switch (StoreClass) {
+ case cByte: StoreTy = Type::ShortTy; StoreClass = cShort; break;
+ case cShort: StoreTy = Type::IntTy; StoreClass = cInt; break;
+ case cInt: StoreTy = Type::LongTy; StoreClass = cLong; break;
+ case cLong: visitInstruction(CI); // unsigned long long -> more complex
+ default: assert(0 && "Unknown store class!");
+ }
+
+ // Spill the integer to memory and reload it from there...
+ int FrameIdx =
+ F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
+
+ static const unsigned Op1[] =
+ { 0, X86::FISTr16, X86::FISTr32, 0, X86::FISTPr64 };
+ addFrameReference(BuildMI(BB, Op1[StoreClass], 5), FrameIdx).addReg(SrcReg);
+
+ if (DestClass == cLong) {
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, DestReg), FrameIdx);
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, DestReg+1), FrameIdx, 4);
+ } else {
+ static const unsigned Op2[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
+ addFrameReference(BuildMI(BB, Op2[DestClass], 4, DestReg), FrameIdx);
}
+
+ // Reload the original control word now...
+ addFrameReference(BuildMI(BB, X86::FLDCWm16, 4), CWFrameIdx);
+ return;
+ }
+
// Anything we haven't handled already, we can't (yet) handle at all.
- //
- // FP to integral casts can be handled with FISTP to store onto the
- // stack while converting to integer, followed by a MOV to load from
- // the stack into the result register. Integral to FP casts can be
- // handled with MOV to store onto the stack, followed by a FILD to
- // load from the stack while converting to FP. For the moment, I
- // can't quite get straight in my head how to borrow myself some
- // stack space and write on it. Otherwise, this would be trivial.
visitInstruction (CI);
}
@@ -1121,16 +1485,8 @@
return Count+1;
}
-/// visitGetElementPtrInst - I don't know, most programs don't have
-/// getelementptr instructions, right? That means we can put off
-/// implementing this, right? Right. This method emits machine
-/// instructions to perform type-safe pointer arithmetic. I am
-/// guessing this could be cleaned up somewhat to use fewer temporary
-/// registers.
-void
-ISel::visitGetElementPtrInst (GetElementPtrInst &I)
-{
- unsigned outputReg = getReg (I);
+void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
+ unsigned outputReg = getReg(I);
MachineBasicBlock::iterator MI = BB->end();
emitGEPOperation(BB, MI, I.getOperand(0),
I.op_begin()+1, I.op_end(), outputReg);
@@ -1142,14 +1498,14 @@
User::op_iterator IdxEnd, unsigned TargetReg) {
const TargetData &TD = TM.getTargetData();
const Type *Ty = Src->getType();
- unsigned basePtrReg = getReg(Src, MBB, IP);
+ unsigned BaseReg = getReg(Src, MBB, IP);
// GEPs have zero or more indices; we must perform a struct access
// or array access for each one.
for (GetElementPtrInst::op_iterator oi = IdxBegin,
oe = IdxEnd; oi != oe; ++oi) {
Value *idx = *oi;
- unsigned nextBasePtrReg = makeAnotherReg(Type::UIntTy);
+ unsigned NextReg = BaseReg;
if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
// It's a struct access. idx is the index into the structure,
// which names the field. This index must have ubyte type.
@@ -1162,11 +1518,12 @@
// right byte offset from the StructLayout class's list of
// structure member offsets.
unsigned idxValue = CUI->getValue();
- unsigned memberOffset =
- TD.getStructLayout(StTy)->MemberOffsets[idxValue];
- // Emit an ADD to add memberOffset to the basePtr.
- BMI(MBB, IP, X86::ADDri32, 2,
- nextBasePtrReg).addReg(basePtrReg).addZImm(memberOffset);
+ unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
+ if (FieldOff) {
+ NextReg = makeAnotherReg(Type::UIntTy);
+ // Emit an ADD to add FieldOff to the basePtr.
+ BMI(MBB, IP, X86::ADDri32, 2,NextReg).addReg(BaseReg).addZImm(FieldOff);
+ }
// The next type is the member of the structure selected by the
// index.
Ty = StTy->getElementTypes()[idxValue];
@@ -1178,7 +1535,7 @@
// time.
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
- // We want to add basePtrReg to(idxReg * sizeof ElementType). First, we
+ // We want to add BaseReg to(idxReg * sizeof ElementType). First, we
// must find the size of the pointed-to type (Not coincidentally, the next
// type is the type of the elements in the array).
Ty = SqTy->getElementType();
@@ -1186,25 +1543,21 @@
// If idxReg is a constant, we don't need to perform the multiply!
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
- if (CSI->isNullValue()) {
- BMI(MBB, IP, X86::MOVrr32, 1, nextBasePtrReg).addReg(basePtrReg);
- } else {
+ if (!CSI->isNullValue()) {
unsigned Offset = elementSize*CSI->getValue();
-
- BMI(MBB, IP, X86::ADDri32, 2,
- nextBasePtrReg).addReg(basePtrReg).addZImm(Offset);
+ NextReg = makeAnotherReg(Type::UIntTy);
+ BMI(MBB, IP, X86::ADDri32, 2,NextReg).addReg(BaseReg).addZImm(Offset);
}
} else if (elementSize == 1) {
// If the element size is 1, we don't have to multiply, just add
unsigned idxReg = getReg(idx, MBB, IP);
- BMI(MBB, IP, X86::ADDrr32, 2,
- nextBasePtrReg).addReg(basePtrReg).addReg(idxReg);
+ NextReg = makeAnotherReg(Type::UIntTy);
+ BMI(MBB, IP, X86::ADDrr32, 2, NextReg).addReg(BaseReg).addReg(idxReg);
} else {
unsigned idxReg = getReg(idx, MBB, IP);
unsigned OffsetReg = makeAnotherReg(Type::UIntTy);
if (unsigned Shift = ExactLog2(elementSize)) {
// If the element size is exactly a power of 2, use a shift to get it.
-
BMI(MBB, IP, X86::SHLir32, 2,
OffsetReg).addReg(idxReg).addZImm(Shift-1);
} else {
@@ -1214,22 +1567,22 @@
// Emit a MUL to multiply the register holding the index by
// elementSize, putting the result in OffsetReg.
- doMultiply(MBB, IP, OffsetReg, Type::LongTy, idxReg, elementSizeReg);
+ doMultiply(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSizeReg);
}
// Emit an ADD to add OffsetReg to the basePtr.
- BMI(MBB, IP, X86::ADDrr32, 2,
- nextBasePtrReg).addReg(basePtrReg).addReg(OffsetReg);
+ NextReg = makeAnotherReg(Type::UIntTy);
+ BMI(MBB, IP, X86::ADDrr32, 2,NextReg).addReg(BaseReg).addReg(OffsetReg);
}
}
// Now that we are here, further indices refer to subtypes of this
- // one, so we don't need to worry about basePtrReg itself, anymore.
- basePtrReg = nextBasePtrReg;
+ // one, so we don't need to worry about BaseReg itself, anymore.
+ BaseReg = NextReg;
}
// After we have processed all the indices, the result is left in
- // basePtrReg. Move it to the register where we were expected to
+ // BaseReg. Move it to the register where we were expected to
// put the answer. A 32-bit move should do it, because we are in
// ILP32 land.
- BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(basePtrReg);
+ BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
}
@@ -1276,7 +1629,7 @@
BuildMI(BB, X86::ANDri32, 2, AlignedSize).addReg(AddedSizeReg).addZImm(~15);
// Subtract size from stack pointer, thereby allocating some space.
- BuildMI(BB, X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(AlignedSize);
+ BuildMI(BB, X86::SUBrr32, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
// Put a pointer to the space into the result register, by copying
// the stack pointer.
@@ -1286,7 +1639,46 @@
// object.
F->getFrameInfo()->CreateVariableSizedObject();
}
-
+
+/// visitMallocInst - Malloc instructions are code generated into direct calls
+/// to the library malloc.
+///
+void ISel::visitMallocInst(MallocInst &I) {
+ unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType());
+ unsigned Arg;
+
+ if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) {
+ Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize));
+ } else {
+ Arg = makeAnotherReg(Type::UIntTy);
+ unsigned Op0Reg = getReg(ConstantUInt::get(Type::UIntTy, AllocSize));
+ unsigned Op1Reg = getReg(I.getOperand(0));
+ MachineBasicBlock::iterator MBBI = BB->end();
+ doMultiply(BB, MBBI, Arg, Type::UIntTy, Op0Reg, Op1Reg);
+
+
+ }
+
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(Arg, Type::UIntTy));
+ MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
+ 1).addExternalSymbol("malloc", true);
+ doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args);
+}
+
+
+/// visitFreeInst - Free instructions are code gen'd to call the free libc
+/// function.
+///
+void ISel::visitFreeInst(FreeInst &I) {
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(getReg(I.getOperand(0)),
+ I.getOperand(0)->getType()));
+ MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
+ 1).addExternalSymbol("free", true);
+ doCall(ValueRecord(0, Type::VoidTy), TheCall, Args);
+}
+
/// createSimpleX86InstructionSelector - This pass converts an LLVM function
/// into a machine code representation is a very simple peep-hole fashion. The
More information about the llvm-commits
mailing list