[llvm-commits] CVS: llvm/lib/Target/X86/InstSelectSimple.cpp
Chris Lattner
lattner at cs.uiuc.edu
Mon Jan 6 12:49:01 PST 2003
Changes in directory llvm/lib/Target/X86:
InstSelectSimple.cpp updated: 1.80 -> 1.81
---
Log message:
* Adjust to use new interfaces, eliminating CurReg stuff
* Support arbitrary FP constants
* Fix bugs in frame layout for function calls and incoming arguments
* Insert copies for constant arguments to PHI nodes into the BOTTOM of
predecessor blocks, not the top.
* Implement _floating point_ support: setcc, return, load, store, cast
* Fix several bugs in the cast instruction
---
Diffs of the changes:
Index: llvm/lib/Target/X86/InstSelectSimple.cpp
diff -u llvm/lib/Target/X86/InstSelectSimple.cpp:1.80 llvm/lib/Target/X86/InstSelectSimple.cpp:1.81
--- llvm/lib/Target/X86/InstSelectSimple.cpp:1.80 Sat Dec 28 15:08:27 2002
+++ llvm/lib/Target/X86/InstSelectSimple.cpp Mon Jan 6 12:47:54 2003
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Target/MRegisterInfo.h"
@@ -59,14 +60,12 @@
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
- unsigned CurReg;
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
// MBBMap - Mapping between LLVM BB -> Machine BB
std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
- ISel(TargetMachine &tm)
- : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
+ ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
/// runOnFunction - Top level implementation of instruction selection for
/// the entire function.
@@ -89,7 +88,6 @@
RegMap.clear();
MBBMap.clear();
- CurReg = MRegisterInfo::FirstVirtualRegister;
F = 0;
return false; // We never modify the LLVM itself.
}
@@ -155,7 +153,9 @@
void visitSetGE(SetCondInst &I) { visitSetCCInst(I, 5); }
// Memory Instructions
+ MachineInstr *doFPLoad(const Type *Ty, unsigned DestReg);
void visitLoadInst(LoadInst &I);
+ void doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitAllocaInst(AllocaInst &I);
@@ -198,8 +198,7 @@
// Add the mapping of regnumber => reg class to MachineFunction
const TargetRegisterClass *RC =
TM.getRegisterInfo()->getRegClassForType(Ty);
- F->getSSARegMap()->addRegMap(CurReg, RC);
- return CurReg++;
+ return F->getSSARegMap()->createVirtualRegister(RC);
}
/// getReg - This method turns an LLVM value into a register number. This
@@ -316,8 +315,10 @@
else if (Value == +1.0)
BMI(MBB, IP, X86::FLD1, 0, R);
else {
- std::cerr << "Cannot load constant '" << Value << "'!\n";
- assert(0);
+ // Otherwise we need to spill the constant to memory...
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+ addConstantPoolReference(doFPLoad(CFP->getType(), R), CPI);
}
} else if (isa<ConstantPointerNull>(C)) {
@@ -344,15 +345,13 @@
// [ESP + 8] -- second argument, if four bytes in size
// ...
//
- unsigned ArgOffset = 0;
+ unsigned ArgOffset = 4;
MachineFrameInfo *MFI = F->getFrameInfo();
for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
unsigned Reg = getReg(*I);
- ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
int FI; // Frame object index
-
switch (getClassB(I->getType())) {
case cByte:
FI = MFI->CreateFixedObject(1, ArgOffset);
@@ -373,14 +372,15 @@
FI = MFI->CreateFixedObject(4, ArgOffset);
} else {
Opcode = X86::FLDr64;
- ArgOffset += 4; // doubles require 4 additional bytes
FI = MFI->CreateFixedObject(8, ArgOffset);
+ ArgOffset += 4; // doubles require 4 additional bytes
}
addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
break;
default:
assert(0 && "Unhandled argument type!");
}
+ ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
}
}
@@ -390,6 +390,7 @@
/// the current one.
///
void ISel::SelectPHINodes() {
+ const MachineInstrInfo &MII = TM.getInstrInfo();
const Function &LF = *F->getFunction(); // The LLVM function...
for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
const BasicBlock *BB = I;
@@ -410,11 +411,10 @@
// available in a virtual register, insert the computation code into
// PredMBB
//
- // FIXME: This should insert the code into the BOTTOM of the block, not
- // the top of the block. This just makes for huge live ranges...
- MachineBasicBlock::iterator PI = PredMBB->begin();
- while ((*PI)->getOpcode() == X86::PHI) ++PI;
-
+ MachineBasicBlock::iterator PI = PredMBB->end();
+ while (PI != PredMBB->begin() &&
+ MII.isTerminatorInstr((*(PI-1))->getOpcode()))
+ --PI;
MI->addRegOperand(getReg(PN->getIncomingValue(i), PredMBB, PI));
MI->addMachineBasicBlockOperand(PredMBB);
}
@@ -433,6 +433,7 @@
void ISel::visitSetCCInst(SetCondInst &I, unsigned OpNum) {
// The arguments are already supposed to be of the same type.
const Type *CompTy = I.getOperand(0)->getType();
+ bool isSigned = CompTy->isSigned();
unsigned reg1 = getReg(I.getOperand(0));
unsigned reg2 = getReg(I.getOperand(1));
@@ -442,44 +443,26 @@
// compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
// 32-bit.
case cByte:
- BuildMI (BB, X86::CMPrr8, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr8, 2).addReg(reg1).addReg(reg2);
break;
case cShort:
- BuildMI (BB, X86::CMPrr16, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr16, 2).addReg(reg1).addReg(reg2);
break;
case cInt:
- BuildMI (BB, X86::CMPrr32, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2);
break;
-
-#if 0
- // Push the variables on the stack with fldl opcodes.
- // FIXME: assuming var1, var2 are in memory, if not, spill to
- // stack first
- case cFP: // Floats
- BuildMI (BB, X86::FLDr32, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr32, 1).addReg (reg2);
+ case cFP:
+ BuildMI(BB, X86::FpUCOM, 2).addReg(reg1).addReg(reg2);
+ BuildMI(BB, X86::FNSTSWr8, 0);
+ BuildMI(BB, X86::SAHF, 1);
+ isSigned = false; // Compare with unsigned operators
break;
- case cFP (doubles): // Doubles
- BuildMI (BB, X86::FLDr64, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr64, 1).addReg (reg2);
- break;
-#endif
+
case cLong:
default:
visitInstruction(I);
}
-#if 0
- if (CompTy->isFloatingPoint()) {
- // (Non-trapping) compare and pop twice.
- BuildMI (BB, X86::FUCOMPP, 0);
- // Move fp status word (concodes) to ax.
- BuildMI (BB, X86::FNSTSWr8, 1, X86::AX);
- // Load real concodes from ax.
- BuildMI (BB, X86::SAHF, 1).addReg(X86::AH);
- }
-#endif
-
// Emit setOp instruction (extract concode; clobbers ax),
// using the following mapping:
// LLVM -> X86 signed X86 unsigned
@@ -496,12 +479,12 @@
{X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr},
};
- BuildMI(BB, OpcodeTab[CompTy->isSigned()][OpNum], 0, getReg(I));
+ BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, getReg(I));
}
/// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
/// operand, in the specified target register.
-void ISel::promote32 (unsigned targetReg, Value *v) {
+void ISel::promote32(unsigned targetReg, Value *v) {
unsigned vReg = getReg(v);
bool isUnsigned = v->getType()->isUnsigned();
switch (getClass(v->getType())) {
@@ -539,7 +522,7 @@
/// ret long, ulong : Move value into EAX/EDX and return
/// ret float/double : Top of FP stack
///
-void ISel::visitReturnInst (ReturnInst &I) {
+void ISel::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() == 0) {
BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
return;
@@ -553,13 +536,13 @@
promote32(X86::EAX, RetVal);
break;
case cFP: // Floats & Doubles: Return in ST(0)
- BuildMI(BB, X86::FpMOV, 1, X86::ST0).addReg(getReg(RetVal));
+ BuildMI(BB, X86::FpSETRESULT, 1).addReg(getReg(RetVal));
break;
case cLong:
// ret long: use EAX(least significant 32 bits)/EDX (most
// significant 32)...
default:
- visitInstruction (I);
+ visitInstruction(I);
}
// Emit a 'ret' instruction
BuildMI(BB, X86::RET, 0);
@@ -595,11 +578,9 @@
for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
switch (getClass(CI.getOperand(i)->getType())) {
case cByte: case cShort: case cInt:
- NumBytes += 4;
- break;
+ NumBytes += 4; break;
case cLong:
- NumBytes += 8;
- break;
+ NumBytes += 8; break;
case cFP:
NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8;
break;
@@ -623,29 +604,32 @@
X86::ESP, ArgOffset).addReg(R);
break;
}
- case cInt:
+ case cInt: {
+ unsigned ArgReg = getReg(Arg);
addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
break;
+ }
- case cFP:
+ case cFP: {
+ unsigned ArgReg = getReg(Arg);
if (Arg->getType() == Type::FloatTy) {
addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
} else {
assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!");
- ArgOffset += 4;
- addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ addRegOffset(BuildMI(BB, X86::FSTr64, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ ArgOffset += 4; // 8 byte entry, not 4.
}
break;
-
+ }
default:
- // FIXME: long/ulong/float/double args not handled.
+ // FIXME: long/ulong args not handled.
visitInstruction(CI);
break;
}
- ArgOffset += 4;
+ ArgOffset += 4; // All arguments are at least 4 bytes
}
}
@@ -657,7 +641,8 @@
BuildMI(BB, X86::CALLr32, 1).addReg(Reg);
}
- BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes);
+ if (NumBytes)
+ BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes);
// If there is a return value, scavenge the result from the location the call
// leaves it in...
@@ -679,7 +664,7 @@
break;
}
case cFP: // Floating-point return values live in %ST(0)
- BuildMI(BB, X86::FpMOV, 1, getReg(CI)).addReg(X86::ST0);
+ BuildMI(BB, X86::FpGETRESULT, 1, getReg(CI));
break;
default:
std::cerr << "Cannot get return value for call of type '"
@@ -783,7 +768,7 @@
if (I.getOpcode() == Instruction::Div)
BuildMI(BB, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
else
- BuildMI(BB, X86::FpREM, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ visitInstruction(I);
return;
default:
case cLong:
@@ -846,50 +831,94 @@
if (OperandClass > cInt)
visitInstruction(I); // Can't handle longs yet!
- if (ConstantUInt *CUI = dyn_cast<ConstantUInt> (I.getOperand (1)))
- {
- // The shift amount is constant, guaranteed to be a ubyte. Get its value.
- assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
- unsigned char shAmt = CUI->getValue();
-
- static const unsigned ConstantOperand[][4] = {
- { X86::SHRir8, X86::SHRir16, X86::SHRir32, 0 }, // SHR
- { X86::SARir8, X86::SARir16, X86::SARir32, 0 }, // SAR
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SHL
- { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SAL = SHL
- };
+ if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(I.getOperand(1))) {
+ // The shift amount is constant, guaranteed to be a ubyte. Get its value.
+ assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
+ unsigned char shAmt = CUI->getValue();
+
+ static const unsigned ConstantOperand[][4] = {
+ { X86::SHRir8, X86::SHRir16, X86::SHRir32, 0 }, // SHR
+ { X86::SARir8, X86::SARir16, X86::SARir32, 0 }, // SAR
+ { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SHL
+ { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SAL = SHL
+ };
- const unsigned *OpTab = // Figure out the operand table to use
- ConstantOperand[isLeftShift*2+isOperandSigned];
+ const unsigned *OpTab = // Figure out the operand table to use
+ ConstantOperand[isLeftShift*2+isOperandSigned];
- // Emit: <insn> reg, shamt (shift-by-immediate opcode "ir" form.)
- BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addZImm(shAmt);
- }
- else
- {
- // The shift amount is non-constant.
- //
- // In fact, you can only shift with a variable shift amount if
- // that amount is already in the CL register, so we have to put it
- // there first.
- //
+ // Emit: <insn> reg, shamt (shift-by-immediate opcode "ir" form.)
+ BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addZImm(shAmt);
+ } else {
+ // The shift amount is non-constant.
+ //
+ // In fact, you can only shift with a variable shift amount if
+ // that amount is already in the CL register, so we have to put it
+ // there first.
+ //
- // Emit: move cl, shiftAmount (put the shift amount in CL.)
- BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
+ // Emit: move cl, shiftAmount (put the shift amount in CL.)
+ BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
- // This is a shift right (SHR).
- static const unsigned NonConstantOperand[][4] = {
- { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32, 0 }, // SHR
- { X86::SARrr8, X86::SARrr16, X86::SARrr32, 0 }, // SAR
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SHL
- { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SAL = SHL
- };
+ // This is a shift right (SHR).
+ static const unsigned NonConstantOperand[][4] = {
+ { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32, 0 }, // SHR
+ { X86::SARrr8, X86::SARrr16, X86::SARrr32, 0 }, // SAR
+ { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SHL
+ { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SAL = SHL
+ };
- const unsigned *OpTab = // Figure out the operand table to use
- NonConstantOperand[isLeftShift*2+isOperandSigned];
+ const unsigned *OpTab = // Figure out the operand table to use
+ NonConstantOperand[isLeftShift*2+isOperandSigned];
- BuildMI(BB, OpTab[OperandClass], 1, DestReg).addReg(Op0r);
- }
+ BuildMI(BB, OpTab[OperandClass], 1, DestReg).addReg(Op0r);
+ }
+}
+
+
+/// doFPLoad - This method is used to load an FP value from memory using the
+/// current endianness. NOTE: This method returns a partially constructed load
+/// instruction which needs to have the memory source filled in still.
+///
+MachineInstr *ISel::doFPLoad(const Type *Ty, unsigned DestReg) {
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLDr32 : X86::FLDr64;
+
+ if (TM.getTargetData().isLittleEndian()) // fast path...
+ return BuildMI(BB, LoadOpcode, 4, DestReg);
+
+ // If we are big-endian, start by creating an LEA instruction to represent the
+ // address of the memory location to load from...
+ //
+ unsigned SrcAddrReg = makeAnotherReg(Type::UIntTy);
+ MachineInstr *Result = BuildMI(BB, X86::LEAr32, 5, SrcAddrReg);
+
+ // Allocate a temporary stack slot to transform the value into...
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(Ty, TM.getTargetData());
+ unsigned DestAddrReg = makeAnotherReg(Type::UIntTy);
+ addFrameReference(BuildMI(BB, X86::LEAr32, 5, DestAddrReg), FrameIdx);
+
+ // Perform the bswaps 32 bits at a time...
+ unsigned TmpReg1 = makeAnotherReg(Type::UIntTy);
+ unsigned TmpReg2 = makeAnotherReg(Type::UIntTy);
+ addDirectMem(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ DestAddrReg, Offset).addReg(TmpReg2);
+
+ if (Ty == Type::DoubleTy) { // Swap the other 32 bits of a double value...
+ TmpReg1 = makeAnotherReg(Type::UIntTy);
+ TmpReg2 = makeAnotherReg(Type::UIntTy);
+
+ addRegOffset(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg, 4);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addDirectMem(BuildMI(BB, X86::MOVrm32, 5), DestAddrReg).addReg(TmpReg2);
+ }
+
+ // Now we can reload the final byteswapped result into the final destination.
+ addFrameReference(BuildMI(BB, LoadOpcode, 4, DestReg), FrameIdx);
+ return Result;
}
@@ -907,9 +936,7 @@
switch (Class) {
default: visitInstruction(I); // FIXME: Handle longs...
case cFP: {
- // FIXME: Handle endian swapping for FP values.
- unsigned Opcode = I.getType() == Type::FloatTy ? X86::FLDr32 : X86::FLDr64;
- addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg);
+ addDirectMem(doFPLoad(I.getType(), DestReg), SrcAddrReg);
return;
}
case cInt: // Integers of various sizes handled below
@@ -922,17 +949,17 @@
// in is in the upper part of the eight byte memory image of the pointer. It
// also happens to be byte-swapped, but this will be handled later.
//
- if (!isLittleEndian && hasLongPointers && isa<PointerType>(I.getType())) {
+ if (!isLittleEndian && hasLongPointers &&
+ (isa<PointerType>(I.getType()) ||
+ I.getType() == Type::LongTy || I.getType() == Type::ULongTy)) {
unsigned R = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADDri32, 2, R).addReg(SrcAddrReg).addZImm(4);
SrcAddrReg = R;
}
unsigned IReg = DestReg;
- if (!isLittleEndian) { // If big endian we need an intermediate stage
- IReg = makeAnotherReg(I.getType());
- std::swap(IReg, DestReg);
- }
+ if (!isLittleEndian) // If big endian we need an intermediate stage
+ DestReg = makeAnotherReg(I.getType());
static const unsigned Opcode[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
addDirectMem(BuildMI(BB, Opcode[Class], 4, DestReg), SrcAddrReg);
@@ -957,7 +984,7 @@
BuildMI(BB, X86::MOVrr16, 1, X86::AX).addReg(DestReg);
BuildMI(BB, X86::XCHGrr8, 2).addReg(X86::AL, MOTy::UseAndDef)
.addReg(X86::AH, MOTy::UseAndDef);
- BuildMI(BB, X86::MOVrr16, 1, DestReg).addReg(X86::AX);
+ BuildMI(BB, X86::MOVrr16, 1, IReg).addReg(X86::AX);
break;
default: assert(0 && "Class not handled yet!");
}
@@ -965,32 +992,70 @@
}
+/// doFPStore - This method is used to store an FP value to memory using the
+/// current endianness.
+///
+void ISel::doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg) {
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned StoreOpcode = Ty == Type::FloatTy ? X86::FSTr32 : X86::FSTr64;
+
+ if (TM.getTargetData().isLittleEndian()) { // fast path...
+ addDirectMem(BuildMI(BB, StoreOpcode,5), DestAddrReg).addReg(SrcReg);
+ return;
+ }
+
+ // Allocate a temporary stack slot to transform the value into...
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(Ty, TM.getTargetData());
+ unsigned SrcAddrReg = makeAnotherReg(Type::UIntTy);
+ addFrameReference(BuildMI(BB, X86::LEAr32, 5, SrcAddrReg), FrameIdx);
+
+ // Store the value into a temporary stack slot...
+ addDirectMem(BuildMI(BB, StoreOpcode, 5), SrcAddrReg).addReg(SrcReg);
+
+ // Perform the bswaps 32 bits at a time...
+ unsigned TmpReg1 = makeAnotherReg(Type::UIntTy);
+ unsigned TmpReg2 = makeAnotherReg(Type::UIntTy);
+ addDirectMem(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ DestAddrReg, Offset).addReg(TmpReg2);
+
+ if (Ty == Type::DoubleTy) { // Swap the other 32 bits of a double value...
+ TmpReg1 = makeAnotherReg(Type::UIntTy);
+ TmpReg2 = makeAnotherReg(Type::UIntTy);
+
+ addRegOffset(BuildMI(BB, X86::MOVmr32, 4, TmpReg1), SrcAddrReg, 4);
+ BuildMI(BB, X86::BSWAPr32, 1, TmpReg2).addReg(TmpReg1);
+ unsigned Offset = (Ty == Type::DoubleTy) << 2;
+ addDirectMem(BuildMI(BB, X86::MOVrm32, 5), DestAddrReg).addReg(TmpReg2);
+ }
+}
+
+
/// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
/// instruction.
///
void ISel::visitStoreInst(StoreInst &I) {
bool isLittleEndian = TM.getTargetData().isLittleEndian();
bool hasLongPointers = TM.getTargetData().getPointerSize() == 8;
- unsigned ValReg = getReg(I.getOperand(0));
- unsigned AddressReg = getReg(I.getOperand(1));
+ unsigned ValReg = getReg(I.getOperand(0));
+ unsigned AddressReg = getReg(I.getOperand(1));
unsigned Class = getClass(I.getOperand(0)->getType());
switch (Class) {
default: visitInstruction(I); // FIXME: Handle longs...
- case cFP: {
- // FIXME: Handle endian swapping for FP values.
- unsigned Opcode = I.getOperand(0)->getType() == Type::FloatTy ?
- X86::FSTr32 : X86::FSTr64;
- addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg);
+ case cFP:
+ doFPStore(I.getOperand(0)->getType(), AddressReg, ValReg);
return;
- }
case cInt: // Integers of various sizes handled below
case cShort:
case cByte: break;
}
if (!isLittleEndian && hasLongPointers &&
- isa<PointerType>(I.getOperand(0)->getType())) {
+ (isa<PointerType>(I.getOperand(0)->getType()) ||
+ I.getType() == Type::LongTy || I.getType() == Type::ULongTy)) {
unsigned R = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADDri32, 2, R).addReg(AddressReg).addZImm(4);
AddressReg = R;
@@ -1026,85 +1091,143 @@
/// visitCastInst - Here we have various kinds of copying with or without
/// sign extension going on.
-void
-ISel::visitCastInst (CastInst &CI)
-{
- const Type *targetType = CI.getType ();
- Value *operand = CI.getOperand (0);
- unsigned operandReg = getReg (operand);
- const Type *sourceType = operand->getType ();
- unsigned destReg = getReg (CI);
- //
- // Currently we handle:
- //
- // 1) cast * to bool
- //
- // 2) cast {sbyte, ubyte} to {sbyte, ubyte}
- // cast {short, ushort} to {ushort, short}
- // cast {int, uint, ptr} to {int, uint, ptr}
- //
- // 3) cast {sbyte, ubyte} to {ushort, short}
- // cast {sbyte, ubyte} to {int, uint, ptr}
- // cast {short, ushort} to {int, uint, ptr}
- //
- // 4) cast {int, uint, ptr} to {short, ushort}
- // cast {int, uint, ptr} to {sbyte, ubyte}
- // cast {short, ushort} to {sbyte, ubyte}
+void ISel::visitCastInst(CastInst &CI) {
+ const Type *DestTy = CI.getType();
+ Value *Src = CI.getOperand(0);
+ unsigned SrcReg = getReg(Src);
+ const Type *SrcTy = Src->getType();
+ unsigned SrcClass = getClassB(SrcTy);
+ unsigned DestReg = getReg(CI);
+ unsigned DestClass = getClassB(DestTy);
// 1) Implement casts to bool by using compare on the operand followed
// by set if not zero on the result.
- if (targetType == Type::BoolTy)
- {
- BuildMI (BB, X86::CMPri8, 2).addReg (operandReg).addZImm (0);
- BuildMI (BB, X86::SETNEr, 1, destReg);
- return;
- }
+ if (DestTy == Type::BoolTy) {
+ if (SrcClass == cFP || SrcClass == cLong)
+ visitInstruction(CI);
+
+ BuildMI(BB, X86::CMPri8, 2).addReg(SrcReg).addZImm(0);
+ BuildMI(BB, X86::SETNEr, 1, DestReg);
+ return;
+ }
- // 2) Implement casts between values of the same type class (as determined
- // by getClass) by using a register-to-register move.
- unsigned srcClass = getClassB(sourceType);
- unsigned targClass = getClass(targetType);
- static const unsigned regRegMove[] = {
- X86::MOVrr8, X86::MOVrr16, X86::MOVrr32
+ static const unsigned RegRegMove[] = {
+ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32, X86::FpMOV
};
- if (srcClass <= cInt && targClass <= cInt && srcClass == targClass) {
- BuildMI(BB, regRegMove[srcClass], 1, destReg).addReg(operandReg);
+ // Implement casts between values of the same type class (as determined by
+ // getClass) by using a register-to-register move.
+ if (SrcClass == DestClass) {
+ if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
+ BuildMI(BB, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
+ } else if (SrcClass == cFP) {
+ if (SrcTy == Type::FloatTy) { // double -> float
+ assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
+ BuildMI(BB, X86::FpMOV, 1, DestReg).addReg(SrcReg);
+ } else { // float -> double
+ assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
+ "Unknown cFP member!");
+ // Truncate from double to float by storing to memory as short, then
+ // reading it back.
+ unsigned FltAlign = TM.getTargetData().getFloatAlignment();
+ int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
+ addFrameReference(BuildMI(BB, X86::FSTr32, 5), FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(BB, X86::FLDr32, 5, DestReg), FrameIdx);
+ }
+ } else {
+ visitInstruction(CI);
+ }
return;
}
- // 3) Handle cast of SMALLER int to LARGER int using a move with sign
- // extension or zero extension, depending on whether the source type
- // was signed.
- if ((srcClass <= cInt) && (targClass <= cInt) && (srcClass < targClass))
- {
- static const unsigned ops[] = {
- X86::MOVSXr16r8, X86::MOVSXr32r8, X86::MOVSXr32r16,
- X86::MOVZXr16r8, X86::MOVZXr32r8, X86::MOVZXr32r16
- };
- unsigned srcSigned = sourceType->isSigned ();
- BuildMI (BB, ops[3 * srcSigned + srcClass + targClass - 1], 1,
- destReg).addReg (operandReg);
- return;
- }
- // 4) Handle cast of LARGER int to SMALLER int using a move to EAX
- // followed by a move out of AX or AL.
- if ((srcClass <= cInt) && (targClass <= cInt) && (srcClass > targClass))
- {
- static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
- BuildMI (BB, regRegMove[srcClass], 1,
- AReg[srcClass]).addReg (operandReg);
- BuildMI (BB, regRegMove[targClass], 1, destReg).addReg (AReg[srcClass]);
- return;
- }
+
+ // Handle cast of SMALLER int to LARGER int using a move with sign extension
+ // or zero extension, depending on whether the source type was signed.
+ if (SrcClass <= cInt && DestClass <= cInt && SrcClass < DestClass) {
+ static const unsigned Opc[][3] = {
+ { X86::MOVSXr16r8, X86::MOVSXr32r8, X86::MOVSXr32r16 }, // signed
+ { X86::MOVZXr16r8, X86::MOVZXr32r8, X86::MOVZXr32r16 } // unsigned
+ };
+
+ BuildMI(BB, Opc[SrcTy->isUnsigned()][SrcClass + DestClass - 1], 1,
+ DestReg).addReg(SrcReg);
+ return;
+ }
+
+ // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
+ // move out of AX or AL.
+ if (SrcClass <= cInt && DestClass <= cInt && SrcClass > DestClass) {
+ static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
+ BuildMI(BB, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
+ BuildMI(BB, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
+ return;
+ }
+
+ // Handle casts from integer to floating point now...
+ if (DestClass == cFP) {
+ // unsigned int -> load as 64 bit int.
+ // unsigned long long -> more complex
+ if (SrcTy->isUnsigned() && SrcTy != Type::UByteTy)
+ visitInstruction(CI); // don't handle unsigned src yet!
+
+ // We don't have the facilities for directly loading byte sized data from
+ // memory. Promote it to 16 bits.
+ if (SrcClass == cByte) {
+ unsigned TmpReg = makeAnotherReg(Type::ShortTy);
+ BuildMI(BB, SrcTy->isSigned() ? X86::MOVSXr16r8 : X86::MOVZXr16r8,
+ 1, TmpReg).addReg(SrcReg);
+ SrcTy = Type::ShortTy; // Pretend the short is our input now!
+ SrcClass = cShort;
+ SrcReg = TmpReg;
+ }
+
+ // Spill the integer to memory and reload it from there...
+ int FrameIdx =
+ F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
+
+ if (SrcClass > cInt) visitInstruction(CI);
+ static const unsigned Op1[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
+ addFrameReference(BuildMI(BB, Op1[SrcClass], 5), FrameIdx).addReg(SrcReg);
+
+ static const unsigned Op2[] =
+ { 0, X86::FILDr16, X86::FILDr32, 0, X86::FILDr64 };
+ addFrameReference(BuildMI(BB, Op2[SrcClass], 5, DestReg), FrameIdx);
+ return;
+ }
+
+ // Handle casts from floating point to integer now...
+ if (SrcClass == cFP) {
+ // unsigned long long -> more complex
+ if (SrcClass == cLong)
+ visitInstruction(CI); // don't handle unsigned src yet!
+
+ // We don't have the facilities for directly storing byte sized data to
+ // memory. Promote it to 16 bits. We also must promote unsigned values to
+ // larger classes because we only have signed FP stores.
+ unsigned StoreClass = DestClass;
+ const Type *StoreTy = DestTy;
+ if (StoreClass == cByte || DestTy->isUnsigned())
+ switch (StoreClass) {
+ case cByte: StoreTy = Type::ShortTy; StoreClass = cShort; break;
+ case cShort: StoreTy = Type::IntTy; StoreClass = cInt; break;
+ case cInt: StoreTy = Type::LongTy; StoreClass = cLong; break;
+ default: assert(0 && "Unknown store class!");
+ }
+
+ // Spill the integer to memory and reload it from there...
+ int FrameIdx =
+ F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
+
+ static const unsigned Op1[] =
+ { 0, X86::FISTr16, X86::FISTr32, 0, X86::FISTPr64 };
+ addFrameReference(BuildMI(BB, Op1[StoreClass], 5), FrameIdx).addReg(SrcReg);
+
+ if (DestClass > cInt) visitInstruction(CI);
+ static const unsigned Op2[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
+ addFrameReference(BuildMI(BB, Op2[DestClass], 5, DestReg), FrameIdx);
+ return;
+ }
+
// Anything we haven't handled already, we can't (yet) handle at all.
- //
- // FP to integral casts can be handled with FISTP to store onto the
- // stack while converting to integer, followed by a MOV to load from
- // the stack into the result register. Integral to FP casts can be
- // handled with MOV to store onto the stack, followed by a FILD to
- // load from the stack while converting to FP. For the moment, I
- // can't quite get straight in my head how to borrow myself some
- // stack space and write on it. Otherwise, this would be trivial.
visitInstruction (CI);
}
@@ -1164,9 +1287,13 @@
unsigned idxValue = CUI->getValue();
unsigned memberOffset =
TD.getStructLayout(StTy)->MemberOffsets[idxValue];
- // Emit an ADD to add memberOffset to the basePtr.
- BMI(MBB, IP, X86::ADDri32, 2,
- nextBasePtrReg).addReg(basePtrReg).addZImm(memberOffset);
+ if (memberOffset) {
+ // Emit an ADD to add memberOffset to the basePtr.
+ BMI(MBB, IP, X86::ADDri32, 2,
+ nextBasePtrReg).addReg(basePtrReg).addZImm(memberOffset);
+ } else {
+ BMI(MBB, IP, X86::MOVrr32, 1, nextBasePtrReg).addReg(basePtrReg);
+ }
// The next type is the member of the structure selected by the
// index.
Ty = StTy->getElementTypes()[idxValue];
@@ -1276,7 +1403,7 @@
BuildMI(BB, X86::ANDri32, 2, AlignedSize).addReg(AddedSizeReg).addZImm(~15);
// Subtract size from stack pointer, thereby allocating some space.
- BuildMI(BB, X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(AlignedSize);
+ BuildMI(BB, X86::SUBrr32, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
// Put a pointer to the space into the result register, by copying
// the stack pointer.
More information about the llvm-commits
mailing list