[llvm-branch-commits] [llvm-branch] r107465 [2/2] - in /llvm/branches/wendling/eh: ./ autoconf/ bindings/ada/llvm/ cmake/ docs/ docs/tutorial/ examples/Kaleidoscope/Chapter3/ examples/Kaleidoscope/Chapter5/ examples/Kaleidoscope/Chapter6/ examples/Kaleidoscope/Chapter7/ include/llvm-c/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/CodeGen/ include/llvm/Config/ include/llvm/MC/ include/llvm/MC/MCParser/ include/llvm/Support/ include/llvm/System/ include/llvm/Target/ include/llvm/Transforms/ include/llvm/T...
Bill Wendling
isanbard at gmail.com
Fri Jul 2 02:57:15 PDT 2010
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -138,7 +138,7 @@
// MOVPC32r is basically a call plus a pop instruction.
if (Desc.getOpcode() == X86::MOVPC32r)
emitInstruction(*I, &II->get(X86::POP32r));
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
}
}
} while (MCE.finishFunction(MF));
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp Fri Jul 2 04:57:13 2010
@@ -342,6 +342,12 @@
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (MBBMap[I->getParent()] != MBB)
+ return false;
+
Opcode = I->getOpcode();
U = I;
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
@@ -351,7 +357,8 @@
if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
if (Ty->getAddressSpace() > 255)
- // Fast instruction selection doesn't support pointers through %fs or %gs
+ // Fast instruction selection doesn't support the special
+ // address spaces.
return false;
switch (Opcode) {
@@ -416,20 +423,33 @@
Disp += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- } else if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- } else
- // Unsupported.
- goto unsupported_gep;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ } else
+ // Unsupported.
+ goto unsupported_gep;
+ } while (!Worklist.empty());
}
}
// Check for displacement overflow.
@@ -915,7 +935,7 @@
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
const MachineInstr *SetMI = 0;
- unsigned Reg = lookUpRegForValue(EI);
+ unsigned Reg = getRegForValue(EI);
for (MachineBasicBlock::const_reverse_iterator
RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
@@ -1179,8 +1199,8 @@
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
- const Value *Op1 = I.getOperand(1); // The guard's value.
- const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
// Grab the frame index.
X86AddressMode AM;
@@ -1191,7 +1211,7 @@
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
assert(CI && "Non-constant type in Intrinsic::objectsize?");
@@ -1246,8 +1266,8 @@
if (!isTypeLegal(RetTy, VT))
return false;
- const Value *Op1 = I.getOperand(1);
- const Value *Op2 = I.getOperand(2);
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
unsigned Reg1 = getRegForValue(Op1);
unsigned Reg2 = getRegForValue(Op2);
@@ -1290,7 +1310,7 @@
bool X86FastISel::X86SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = I->getOperand(0);
+ const Value *Callee = CI->getCalledValue();
// Can't handle inline asm yet.
if (isa<InlineAsm>(Callee))
@@ -1548,6 +1568,7 @@
BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
// Now handle call return value (if any).
+ SmallVector<unsigned, 4> UsedRegs;
if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
@@ -1575,6 +1596,8 @@
RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
if (CopyVT != RVLocs[0].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
@@ -1602,6 +1625,9 @@
UpdateValueMap(I, ResultReg);
}
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
return true;
}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp Fri Jul 2 04:57:13 2010
@@ -133,7 +133,7 @@
// Emit an fxch to update the runtime processors version of the state.
BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
- NumFXCH++;
+ ++NumFXCH;
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
@@ -1021,7 +1021,7 @@
// StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
if (StackTop == 1) {
BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
+ ++NumFXCH;
StackTop = 0;
break;
}
@@ -1058,7 +1058,7 @@
// StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
if (StackTop == 1) {
BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
+ ++NumFXCH;
StackTop = 0;
break;
}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Jul 2 04:57:13 2010
@@ -1646,6 +1646,26 @@
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1656,24 +1676,9 @@
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)), 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
@@ -1786,6 +1791,29 @@
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1796,25 +1824,9 @@
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp Fri Jul 2 04:57:13 2010
@@ -347,6 +347,12 @@
if (!Subtarget->hasSSE2())
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
// Expand certain atomics
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
@@ -1012,7 +1018,6 @@
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::MEMBARRIER);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -2056,7 +2061,6 @@
FPDiff, dl);
}
- bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
@@ -2064,7 +2068,6 @@
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- WasGlobalOrExternal = true;
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
@@ -2096,7 +2099,6 @@
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -2419,7 +2421,6 @@
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
SDValue Arg = Outs[i].Val;
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
@@ -4457,7 +4458,6 @@
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- EVT MaskEltVT = MaskVT.getVectorElementType();
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -6021,6 +6021,7 @@
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
+ default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
@@ -6030,120 +6031,129 @@
case X86::COND_O: case X86::COND_NO:
NeedOF = true;
break;
- default: break;
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
- if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
- unsigned Opcode = 0;
- unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
- case ISD::ADD:
- // Due to an isel shortcoming, be conservative if this add is
- // likely to be selected as part of a load-modify-store
- // instruction. When the root node in a match is a store, isel
- // doesn't know how to remap non-chain non-flag uses of other
- // nodes in the match, such as the ADD in this case. This leads
- // to the ADD being left around and reselected, with the result
- // being two adds in the output. Alas, even if none our users
- // are stores, that doesn't prove we're O.K. Ergo, if we have
- // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
- // A better fix seems to require climbing the DAG back to the
- // root, and it doesn't seem to be worth the effort.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
- goto default_case;
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
}
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::AND: {
- // If the primary and result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
- bool NonFlagUse = false;
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- unsigned UOpNo = UI.getOperandNo();
- if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
- // Look pass truncate.
- UOpNo = User->use_begin().getOperandNo();
- User = *User->use_begin();
- }
- if (User->getOpcode() != ISD::BRCOND &&
- User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
- NonFlagUse = true;
- break;
- }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
}
- if (!NonFlagUse)
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
break;
+ }
}
+
+ if (!NonFlagUse)
+ break;
+ }
// FALL THROUGH
- case ISD::SUB:
- case ISD::OR:
- case ISD::XOR:
- // Due to the ISEL shortcoming noted above, be conservative if this op is
- // likely to be selected as part of a load-modify-store instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() == ISD::STORE)
- goto default_case;
- // Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
- case ISD::SUB: Opcode = X86ISD::SUB; break;
- case ISD::OR: Opcode = X86ISD::OR; break;
- case ISD::XOR: Opcode = X86ISD::XOR; break;
- case ISD::AND: Opcode = X86ISD::AND; break;
- default: llvm_unreachable("unexpected operator!");
- }
- NumOperands = 2;
- break;
- case X86ISD::ADD:
- case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::OR:
- case X86ISD::XOR:
- case X86ISD::AND:
- return SDValue(Op.getNode(), 1);
- default:
- default_case:
- break;
- }
- if (Opcode != 0) {
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i != NumOperands; ++i)
- Ops.push_back(Op.getOperand(i));
- SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
- DAG.ReplaceAllUsesWith(Op, New);
- return SDValue(New.getNode(), 1);
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
}
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
}
- // Otherwise just emit a CMP with 0, which is the TEST pattern.
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, Op.getValueType()));
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
@@ -6170,15 +6180,21 @@
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
- if (Op1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
- if (And10C->getZExtValue() == 1) {
- LHS = Op0;
- RHS = Op1.getOperand(1);
- }
- } else if (Op0.getOpcode() == ISD::SHL) {
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
LHS = Op1;
RHS = Op0.getOperand(1);
}
@@ -6618,6 +6634,7 @@
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
+ (void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -6689,7 +6706,6 @@
SDValue Flag;
- EVT IntPtr = getPointerTy();
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
@@ -6769,9 +6785,6 @@
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
- SDValue Chain = Op.getOperand(0);
- SDValue SrcPtr = Op.getOperand(1);
- SDValue SrcSV = Op.getOperand(2);
report_fatal_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
@@ -9840,61 +9853,6 @@
return SDValue();
}
-// On X86 and X86-64, atomic operations are lowered to locked instructions.
-// Locked instructions, in turn, have implicit fence semantics (all memory
-// operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of
-// fence-atomic-fence.
-static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
- SDValue atomic = N->getOperand(0);
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- break;
- default:
- return SDValue();
- }
-
- SDValue fence = atomic.getOperand(0);
- if (fence.getOpcode() != ISD::MEMBARRIER)
- return SDValue();
-
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2),
- atomic.getOperand(3)), atomic.getResNo());
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2)),
- atomic.getResNo());
- default:
- return SDValue();
- }
-}
-
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
@@ -9942,7 +9900,6 @@
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
- case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
}
@@ -10065,8 +10022,8 @@
// so don't worry about this.
// Verify this is a simple bswap.
- if (CI->getNumOperands() != 2 ||
- CI->getType() != CI->getOperand(1)->getType() ||
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
@@ -10079,7 +10036,7 @@
Module *M = CI->getParent()->getParent()->getParent();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
CI->replaceAllUsesWith(Op);
@@ -10212,7 +10169,6 @@
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -10286,6 +10242,13 @@
break;
}
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC() ||
+ Subtarget->isPICStyleRIPRel())
+ return;
+
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = 0;
@@ -10321,11 +10284,7 @@
getTargetMachine())))
return;
- if (hasMemory)
- Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
- else
- Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
- Result = Op;
+ Result = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
break;
}
}
@@ -10334,8 +10293,7 @@
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::vector<unsigned> X86TargetLowering::
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h Fri Jul 2 04:57:13 2010
@@ -500,7 +500,6 @@
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td Fri Jul 2 04:57:13 2010
@@ -1093,7 +1093,7 @@
// Logical Instructions...
//
-let isTwoAddress = 1 , AddedComplexity = 15 in
+let Constraints = "$src = $dst" , AddedComplexity = 15 in
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src))]>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
@@ -1103,7 +1103,7 @@
def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1130,7 +1130,7 @@
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def AND64mr : RI<0x21, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src),
@@ -1148,7 +1148,7 @@
[(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1175,7 +1175,7 @@
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"or{q}\t{$src, $dst|$dst, $src}",
@@ -1193,7 +1193,7 @@
def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1220,7 +1220,7 @@
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xor{q}\t{$src, $dst|$dst, $src}",
@@ -1362,7 +1362,7 @@
} // Defs = [EFLAGS]
// Conditional moves
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
let isCommutable = 1 in {
def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1526,7 +1526,7 @@
"cmovno{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
X86_COND_NO, EFLAGS))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Use sbb to materialize carry flag into a GPR.
// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
@@ -1584,7 +1584,7 @@
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
@@ -1597,7 +1597,7 @@
[(set VR128:$dst,
(int_x86_sse2_cvtsi642sd VR128:$src1,
(loadi64 addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Signed i64 -> f32
def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
@@ -1607,7 +1607,7 @@
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
@@ -1621,7 +1621,7 @@
[(set VR128:$dst,
(int_x86_sse_cvtsi642ss VR128:$src1,
(loadi64 addr:$src2)))]>;
-}
+} // Constraints = "$src1 = $dst"
// f32 -> signed i64
def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
@@ -1687,6 +1687,7 @@
// Thread Local Storage Instructions
//===----------------------------------------------------------------------===//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -1705,16 +1706,14 @@
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
-// FIXME: Not true for darwin
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP],
+// Darwin TLS Support
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX],
+ Uses = [RDI],
usesCustomInserter = 1 in
def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- "# Fixme into a call",
+ "# TLSCall_64",
[(X86TLSCall addr:$sym)]>,
Requires<[In64BitMode]>;
@@ -1891,6 +1890,8 @@
(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1905,6 +1906,8 @@
(MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri64i32 tglobaltlsaddr :$dst)>, Requires<[SmallCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1919,6 +1922,8 @@
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri32 tglobaltlsaddr :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1936,6 +1941,9 @@
def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaltlsaddr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaltlsaddr:$src)>,
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
Requires<[NearData, IsStatic]>;
@@ -2363,7 +2371,7 @@
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2380,6 +2388,6 @@
(v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
imm:$src3)))]>, OpSize, REX_W;
}
-}
+} // Constraints = "$src1 = $dst"
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td Fri Jul 2 04:57:13 2010
@@ -371,7 +371,7 @@
Requires<[HasCMov]>;
}
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
defm CMOVE : FPCMov<X86_COND_E>;
@@ -380,7 +380,7 @@
defm CMOVNBE: FPCMov<X86_COND_A>;
defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
-}
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td Fri Jul 2 04:57:13 2010
@@ -83,7 +83,6 @@
class OpSize { bit hasOpSizePrefix = 1; }
class AdSize { bit hasAdSizePrefix = 1; }
class REX_W { bit hasREX_WPrefix = 1; }
-class VEX_4V { bit hasVEX_4VPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class SegFS { bits<2> SegOvrBits = 1; }
class SegGS { bits<2> SegOvrBits = 2; }
@@ -102,6 +101,9 @@
class T8 { bits<4> Prefix = 13; }
class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -125,11 +127,13 @@
bits<4> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
- bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -143,7 +147,9 @@
let TSFlags{21-20} = SegOvrBits;
let TSFlags{23-22} = ExeDomain.Value;
let TSFlags{31-24} = Opcode;
- let TSFlags{32} = hasVEX_4VPrefix;
+ let TSFlags{32} = hasVEXPrefix;
+ let TSFlags{33} = hasVEX_WPrefix;
+ let TSFlags{34} = hasVEX_4VPrefix;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -217,9 +223,45 @@
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern> {
- let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+ let Predicates = !if(hasVEXPrefix /* VEX_4V */,
!if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
- !if(!eq(Prefix, 12 /* XS */), [HasSSE2], [HasSSE1]));
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX_4V */,
+ !if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX_4V */,
+ !if(hasOpSizePrefix /* OpSize */, [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+ !if(hasOpSizePrefix /* OpSize */, [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
@@ -247,12 +289,12 @@
Requires<[HasSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX, HasSSE1]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
- VEX_4V, Requires<[HasAVX, HasSSE1]>;
+ Requires<[HasAVX, HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -281,12 +323,12 @@
Requires<[HasSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
Requires<[HasAVX, HasSSE2]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
- VEX_4V, OpSize, Requires<[HasAVX, HasSSE2]>;
+ OpSize, Requires<[HasAVX, HasSSE2]>;
// SSE3 Instruction Templates:
//
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp Fri Jul 2 04:57:13 2010
@@ -3007,16 +3007,16 @@
EVT VT = Load1->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: {
+ default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
if (TM.getSubtargetImpl()->is64Bit()) {
if (NumLoads >= 3)
return false;
- } else if (NumLoads)
+ } else if (NumLoads) {
return false;
+ }
break;
- }
case MVT::i8:
case MVT::i16:
case MVT::i32:
@@ -3025,6 +3025,7 @@
case MVT::f64:
if (NumLoads)
return false;
+ break;
}
return true;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h Fri Jul 2 04:57:13 2010
@@ -424,13 +424,14 @@
// those enums below are used, TSFlags must be shifted right by 32 first.
enum {
//===------------------------------------------------------------------===//
- // VEX_4V - VEX prefixes are instruction prefixes used in AVX.
+ // VEXPrefix - VEX prefixes are instruction prefixes used in AVX.
// VEX_4V is used to specify an additional AVX/SSE register. Several 2
// address instructions in SSE are represented as 3 address ones in AVX
// and the additional register is encoded in VEX_VVVV prefix.
//
- VEXShift = 0,
- VEX_4V = 1 << VEXShift
+ VEX = 1,
+ VEX_W = 1 << 1,
+ VEX_4V = 1 << 2
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td Fri Jul 2 04:57:13 2010
@@ -820,7 +820,18 @@
Requires<[In32BitMode]>;
}
-let isTwoAddress = 1 in // GR32 = bswap GR32
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad=1, neverHasSideEffects=1 in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -1244,7 +1255,7 @@
//===----------------------------------------------------------------------===//
// Two address Instructions.
//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Conditional moves
let Uses = [EFLAGS] in {
@@ -1645,7 +1656,7 @@
// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
// clobber EFLAGS, because if one of the operands is zero, the expansion
// could involve an xor.
-let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in {
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
@@ -1664,86 +1675,106 @@
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
"#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
"#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
"#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
} // Uses = [EFLAGS]
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src)),
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
(implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src)),
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
(implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src)),
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
(implicit EFLAGS)]>;
-let isTwoAddress = 0 in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+
+let Constraints = "" in {
+ def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+ def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
[(store (ineg (loadi16 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
[(store (ineg (loadi32 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src))]>;
-}
-let isTwoAddress = 0 in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
+}
+let Constraints = "" in {
+ def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+ def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
[(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
[(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-}
+} // Constraints = ""
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
}
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1755,23 +1786,24 @@
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
-}
+} // CodeSize = 2
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1783,7 +1815,7 @@
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
} // Defs = [EFLAGS]
// Logical operators...
@@ -1862,7 +1894,7 @@
[(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def AND8mr : I<0x20, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"and{b}\t{$src, $dst|$dst, $src}",
@@ -1914,7 +1946,7 @@
def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
"and{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
@@ -1988,7 +2020,7 @@
"or{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"or{b}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), GR8:$src), addr:$dst),
@@ -2030,7 +2062,7 @@
"or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
"or{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
@@ -2107,7 +2139,7 @@
[(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def XOR8mr : I<0x30, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"xor{b}\t{$src, $dst|$dst, $src}",
@@ -2158,26 +2190,27 @@
"xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
"xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
} // Defs = [EFLAGS]
// Shift instructions
let Defs = [EFLAGS] in {
let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
"shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src, CL))]>;
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -2198,7 +2231,7 @@
} // isConvertibleToThreeAddress = 1
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, CL}",
@@ -2232,18 +2265,18 @@
def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
"shl{l}\t$dst",
[(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
"shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src, CL))]>;
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
}
def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2267,7 +2300,7 @@
"shr{l}\t$dst",
[(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -2301,18 +2334,18 @@
def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
"shr{l}\t$dst",
[(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
"sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src, CL))]>;
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
}
def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2337,7 +2370,7 @@
"sar{l}\t$dst",
[(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -2371,65 +2404,65 @@
def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t$dst",
[(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
// Rotate instructions
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -2469,19 +2502,19 @@
def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-}
+} // Constraints = ""
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
}
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2506,7 +2539,7 @@
"rol{l}\t$dst",
[(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
@@ -2540,18 +2573,18 @@
def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
"rol{l}\t$dst",
[(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
}
def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2576,7 +2609,7 @@
"ror{l}\t$dst",
[(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -2610,8 +2643,7 @@
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
[(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
-
+} // Constraints = ""
// Double shift instructions (generalizations of rotate)
@@ -2667,7 +2699,7 @@
TB, OpSize;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -2713,7 +2745,7 @@
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)]>,
TB, OpSize;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
@@ -2799,7 +2831,7 @@
(X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Addition
def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
@@ -2843,7 +2875,7 @@
"add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
"add{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
@@ -2905,7 +2937,7 @@
"adc{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}",
[(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -2940,7 +2972,7 @@
"adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
"adc{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
} // Uses = [EFLAGS]
// Register-Register Subtraction
@@ -3012,7 +3044,7 @@
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Subtraction
def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
@@ -3057,7 +3089,7 @@
"sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
"sub{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
@@ -3073,7 +3105,7 @@
"sbb{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
[(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -3108,7 +3140,7 @@
"sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -3816,6 +3848,7 @@
// Thread Local Storage Instructions
//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -3830,15 +3863,15 @@
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
-// FIXME: Not true for darwin
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX],
Uses = [ESP],
usesCustomInserter = 1 in
def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "# Fixme into a call",
+ "# TLSCall_32",
[(X86TLSCall addr:$sym)]>,
Requires<[In32BitMode]>;
@@ -4800,14 +4833,14 @@
// Patterns for nodes that do not produce flags, for instructions that do.
// Increment reg.
-def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
// Decrement reg.
-def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td Fri Jul 2 04:57:13 2010
@@ -368,1073 +368,593 @@
}
//===----------------------------------------------------------------------===//
-// SSE1 Instructions
+// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
-// Move Instructions. Register-to-register movss is not used for FR32
-// register copies because it's a partial register update; FsMOVAPSrr is
-// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movss for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}",
- [(set (v4f32 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst, (
+ !nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ asm, [(set RC:$dst, (
+ !nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit MayLoad = 0> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ OpcodeStr, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))],d>;
+ let mayLoad = MayLoad in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ OpcodeStr, [(set RC:$dst, (OpNode RC:$src1,
+ (mem_frag addr:$src2)))],d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2), OpcodeStr, pat_rr, d>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2), OpcodeStr, pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d> {
+ def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst, (
+ !nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))], d>;
+ def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst, (
+ !nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+ SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+ [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> :
+ SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+let isAsmParserOnly = 1 in {
+ def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+ def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+ let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $dst|$dst, $src2}">, XS;
+ def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+ let AddedComplexity = 20 in
+ def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+let AddedComplexity = 15 in {
// Extract the low 32-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (loadf32 addr:$src))]>;
-
+let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
(EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-// Conversion instructions
-def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-
-// Match intrinsics which expect XMM operand(s).
-def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-
-def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
-def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
-def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi
- (load addr:$src)))]>;
-def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
-def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi
- (load addr:$src)))]>;
-let Constraints = "$src1 = $dst" in {
- def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- VR64:$src2))]>;
- def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- (load addr:$src2)))]>;
-}
-
-// Aliases for intrinsics
-def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si VR128:$src))]>;
-def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si(load addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- GR32:$src2))]>;
- def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- (loadi32 addr:$src2)))]>;
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>;
}
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+let isAsmParserOnly = 1 in {
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+}
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize;
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
}
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
-let Defs = [EFLAGS] in {
-def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
-def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
-def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
- VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
- (load addr:$src), imm:$cc))]>;
+// Intrinsic forms of MOVUPS/D load and store
+let isAsmParserOnly = 1 in {
+ let canFoldAsLoad = 1, isReMaterializable = 1 in
+ def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
+ def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
+ def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE1 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
- // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-
-// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
-// disregarded.
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-
-/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
-///
-multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int NoPat = 0,
- bit MayLoad = 0, bit Commutable = 1> {
- def PSrr : PSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- !if(NoPat, []<dag>,
- [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))])> {
- let isCommutable = Commutable;
- }
+def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
- def PDrr : PDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- !if(NoPat, []<dag>,
- [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))])> {
- let isCommutable = Commutable;
- }
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
- def PSrm : PSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- !if(NoPat, []<dag>,
- [(set FR32:$dst, (OpNode FR32:$src1,
- (memopfsf32 addr:$src2)))])> {
- let mayLoad = MayLoad;
- }
-
- def PDrm : PDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- !if(NoPat, []<dag>,
- [(set FR64:$dst, (OpNode FR64:$src1,
- (memopfsf64 addr:$src2)))])> {
- let mayLoad = MayLoad;
- }
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+ PatFrag mov_frag, string base_opc,
+ string asm_opr> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ [(set RC:$dst,
+ (mov_frag RC:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ SSEPackedSingle>, TB;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ SSEPackedDouble>, TB, OpSize;
+}
+
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
}
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
- defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
- defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
- defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
-
- let neverHasSideEffects = 1 in
- defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1, 1, 0>;
+let isAsmParserOnly = 1 in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
}
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
-/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
-multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, X86MemOperand memop> {
- let isCommutable = 1 in {
- def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
- }
- def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memop:$src2),
- OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+let isAsmParserOnly = 1 in {
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>,
+ VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>,
+ VEX;
}
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
-/// basic_sse12_fp_binop_rm - SSE 1 & 2 binops come in both scalar and
-/// vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, bit Commutable = 0> {
-
- let Constraints = "", isAsmParserOnly = 1, hasVEX_4VPrefix = 1 in {
- // Scalar operation, reg+reg.
- let Prefix = 12 /* XS */ in
- defm V#NAME#SS : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- OpNode, FR32, f32mem>;
-
- let Prefix = 11 /* XD */ in
- defm V#NAME#SD : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- OpNode, FR64, f64mem>;
- }
-
- let Constraints = "$src1 = $dst" in {
- // Scalar operation, reg+reg.
- let Prefix = 12 /* XS */ in
- defm SS : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- OpNode, FR32, f32mem>;
- let Prefix = 11 /* XD */ in
- defm SD : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- OpNode, FR64, f64mem>;
- }
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
- def V#NAME#PSrr : VPSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []> {
- let isCommutable = Commutable;
- let Constraints = "";
- let isAsmParserOnly = 1;
- }
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+}
- def V#NAME#PDrr : VPDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []> {
- let isCommutable = Commutable;
- let Constraints = "";
- let isAsmParserOnly = 1;
- }
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- def V#NAME#PSrm : VPSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []> {
- let Constraints = "";
- let isAsmParserOnly = 1;
- }
-
- def V#NAME#PDrm : VPDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []> {
- let Constraints = "";
- let isAsmParserOnly = 1;
- }
-
- // Intrinsic operation, reg+reg.
- def V#NAME#SSrr_Int : VSSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse_xxx_ss
- let Constraints = "";
- }
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ asm, []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src), asm, []>;
+}
- def V#NAME#SDrr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse2_xxx_sd
- let Constraints = "";
- }
+let isAsmParserOnly = 1 in {
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
+ VEX_4V;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
+ VEX_4V;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- VR128:$src2))]>;
- // int_x86_sse_xxx_ss
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
+ VEX;
+ defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX;
+}
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- VR128:$src2))]>;
- // int_x86_sse2_xxx_sd
-
- // Intrinsic operation, reg+mem.
- def V#NAME#SSrm_Int : VSSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- sse_load_f32:$src2))]> {
- // int_x86_sse_xxx_ss
- let Constraints = "";
- }
- def V#NAME#SDrm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- sse_load_f64:$src2))]> {
- // int_x86_sse2_xxx_sd
- let Constraints = "";
- }
+let Constraints = "$src1 = $dst" in {
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+// Instructions below don't have an AVX form.
+defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- sse_load_f32:$src2))]>;
- // int_x86_sse_xxx_ss
+/// SSE 1 Only
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- sse_load_f64:$src2))]>;
- // int_x86_sse2_xxx_sd
-}
+// Aliases for intrinsics
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse_cvttss2si, f32mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
+defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse2_cvttsd2si, f128mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
+}
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XS;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XD;
+
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+}
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
-// Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_rm<0x58, "add", fadd, 1>;
-defm MUL : basic_sse12_fp_binop_rm<0x59, "mul", fmul, 1>;
+/// SSE 2 Only
-let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_rm<0x5C, "sub", fsub>;
- defm DIV : basic_sse12_fp_binop_rm<0x5E, "div", fdiv>;
+// Convert scalar double to scalar single
+let isAsmParserOnly = 1 in {
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XD, Requires<[HasAVX, HasSSE2, OptForSize]>, VEX_4V;
}
-
-/// sse12_fp_binop_rm - Other SSE 1 & 2 binops
-///
-/// This multiclass is like basic_sse12_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form. Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, bit Commutable = 0> {
-
- let Constraints = "", isAsmParserOnly = 1, hasVEX_4VPrefix = 1 in {
- // Scalar operation, reg+reg.
- let Prefix = 12 /* XS */ in
- defm V#NAME#SS : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- OpNode, FR32, f32mem>;
-
- let Prefix = 11 /* XD */ in
- defm V#NAME#SD : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- OpNode, FR64, f64mem>;
- }
-
- let Constraints = "$src1 = $dst" in {
- // Scalar operation, reg+reg.
- let Prefix = 12 /* XS */ in
- defm SS : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- OpNode, FR32, f32mem>;
- let Prefix = 11 /* XD */ in
- defm SD : sse12_fp_scalar<opc,
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- OpNode, FR64, f64mem>;
- }
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse_xxx_ss
- let isCommutable = Commutable;
- }
-
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse2_xxx_sd
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- sse_load_f32:$src2))]>;
- // int_x86_sse_xxx_ss
-
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- sse_load_f64:$src2))]>;
- // int_x86_sse2_xxx_sd
-
- // Vector intrinsic operation, reg+reg.
- def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ps")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse_xxx_ps
- let isCommutable = Commutable;
- }
-
- def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_pd")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse2_xxx_pd
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, reg+mem.
- def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ps")) VR128:$src1,
- (memopv4f32 addr:$src2)))]>;
- // int_x86_sse_xxx_ps
-
- def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_pd")) VR128:$src1,
- (memopv2f64 addr:$src2)))]>;
- // int_x86_sse2_xxx_pd
-}
-}
-
-let isCommutable = 0 in {
- defm MAX : sse12_fp_binop_rm<0x5F, "max", X86fmax>;
- defm MIN : sse12_fp_binop_rm<0x5D, "min", X86fmin>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
-
-def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv4f32 addr:$src))]>;
-def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(store (v4f32 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPSrm : PSI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlp VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- def MOVHPSrm : PSI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
-
-def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
- (unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (undef)), (iPTR 0))), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let AddedComplexity = 20 in {
-def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movlhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
-
-def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movhlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
-} // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-let AddedComplexity = 20 in {
-def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-}
-
-
-
-// Arithmetic
-
-/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a
-/// scalar) and leaves the top elements undefined.
-///
-/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F32Int,
- Intrinsic V4F32Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
- Requires<[HasSSE1, OptForSize]>;
-
- // Vector operation, reg.
- def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, mem.
- def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
-
- // Intrinsic operation, reg.
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, mem.
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
-
- // Vector intrinsic operation, reg
- def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, mem
- def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
-}
-
-// Square root.
-defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
-
-// Reciprocal approximations. Note that these typically require refinement
-// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
- int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
-defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
- int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
-
-/// sse12_fp_pack_logical - SSE 1 & 2 packed FP logical ops
-///
-multiclass sse12_fp_pack_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasPat = 0,
- bit Commutable = 1,
- list<list<dag>> Pattern = []> {
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- !if(HasPat, Pattern[0],
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))])>
- { let isCommutable = Commutable; }
-
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- !if(HasPat, Pattern[1],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))])>
- { let isCommutable = Commutable; }
-
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- !if(HasPat, Pattern[2],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>;
-
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- !if(HasPat, Pattern[3],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>;
-}
-
-// Logical
-let Constraints = "$src1 = $dst" in {
- defm AND : sse12_fp_pack_logical<0x54, "and", and>;
- defm OR : sse12_fp_pack_logical<0x56, "or", or>;
- defm XOR : sse12_fp_pack_logical<0x57, "xor", xor>;
- defm ANDN : sse12_fp_pack_logical<0x55, "andn", undef /* dummy */, 1, 0, [
- // single r+r
- [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))],
- // double r+r
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))],
- // single r+m
- [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))],
- // double r+m
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]]>;
-}
-
-let Constraints = "$src1 = $dst" in {
- def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
-
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
- def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
-}
-}
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- let isConvertibleToThreeAddress = 1 in // Convert to pshufd
- def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- VR128:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3
- VR128:$src1, (memopv4f32 addr:$src2))))]>;
-
- let AddedComplexity = 10 in {
- def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1,
- (memopv4f32 addr:$src2))))]>;
-
- def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-// Mask creation
-def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
-def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
-
-// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
-
-// Non-temporal stores
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
-
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
-
-def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
-}
-
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- TB, Requires<[HasSSE1]>;
-
-// MXCSR register
-def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
-def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
-
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
-
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
-//===---------------------------------------------------------------------===//
-// SSE2 Instructions
-//===---------------------------------------------------------------------===//
-
-// Move Instructions. Register-to-register movsd is not used for FR64
-// register copies because it's a partial register update; FsMOVAPDrr is
-// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movsd for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}",
- [(set (v2f64 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
-
-// Extract the low 64-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
-def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (loadf64 addr:$src))]>;
-
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-}
-
-// Store scalar value to memory.
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>;
-
-// Extract and store.
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
-// Conversion instructions
-def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -1442,35 +962,28 @@
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-
-// SSE2 instructions with XS prefix
+let isAsmParserOnly = 1 in
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+
+// Convert scalar single to scalar double
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, Requires<[HasAVX, HasSSE2]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, VEX_4V, Requires<[HasAVX, HasSSE2, OptForSize]>;
+}
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -1480,208 +993,51 @@
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>,
- Requires<[HasSSE2, OptForSpeed]>;
-
-// Match intrinsics which expect XMM operand(s).
-def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
-def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
-def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi
- (memop addr:$src)))]>;
-def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
-def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi
- (memop addr:$src)))]>;
-def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
-def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd
- (load addr:$src)))]>;
-
-// Aliases for intrinsics
-def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse2_cvttsd2si VR128:$src))]>;
-def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvttsd2si
- (load addr:$src)))]>;
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS, VEX_4V,
+ Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS, VEX_4V,
+ Requires<[HasAVX, HasSSE2]>;
}
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
}
-let Defs = [EFLAGS] in {
-def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
-def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
-} // Defs = [EFLAGS]
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- (load addr:$src), imm:$cc))]>;
+// Convert doubleword to packed single/double fp
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, VEX, Requires<[HasAVX, HasSSE2]>;
}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE2 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
-
-//===---------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
-
-def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
-def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv2f64 addr:$src))]>;
-def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPD load and store
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPDrm : PDI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlp VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- def MOVHPDrm : PDI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlhps VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>;
-
-// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -1692,7 +1048,18 @@
(bitconvert (memopv2i64 addr:$src))))]>,
TB, Requires<[HasSSE2]>;
-// SSE2 instructions with XS prefix
+// FIXME: why the non-intrinsic version is described as SSE3?
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -1703,6 +1070,29 @@
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+// Convert packed single/double fp to doubleword
+let isAsmParserOnly = 1 in {
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -1710,12 +1100,54 @@
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-// SSE2 packed instructions with XS prefix
+
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, VEX, Requires<[HasAVX, HasSSE2]>;
+}
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1727,17 +1159,18 @@
(memop addr:$src)))]>,
XS, Requires<[HasSSE2]>;
-// SSE2 packed instructions with XD prefix
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))]>,
- XD, Requires<[HasSSE2]>;
-
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -1746,12 +1179,31 @@
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-// SSE2 instructions without OpSize prefix
+// Convert packed single to packed double
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ VEX, Requires<[HasAVX, HasSSE2]>;
+}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1762,12 +1214,29 @@
(load addr:$src)))]>,
TB, Requires<[HasSSE2]>;
+// Convert packed double to packed single
+let isAsmParserOnly = 1 in {
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+// FIXME: the memory form of this instruction should described using
+// use extra asm syntax
+}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1776,214 +1245,1001 @@
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-// Match intrinsics which expect XMM operand(s).
-// Aliases for intrinsics
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ string asm, string asm_alt> {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
+ let mayLoad = 1 in
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
+ }
+}
+
+let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+ defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XS, VEX_4V;
+ defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isAsmParserOnly = 1 in {
+ defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+ defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
+}
let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- GR32:$src2))]>;
-def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- (loadi32 addr:$src2)))]>;
-def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- VR128:$src2))]>;
-def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- (load addr:$src2)))]>;
-def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))]>, XS,
- Requires<[HasSSE2]>;
-def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))]>, XS,
- Requires<[HasSSE2]>;
+ defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+ let isAsmParserOnly = 1 in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm, string asm_alt,
+ Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ }
+}
+
+let isAsmParserOnly = 1 in {
+ defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst, (vt (shufp:$src3
+ VR128:$src1, (mem_frag addr:$src2))))], d>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst,
+ (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+ let isAsmParserOnly = 1 in {
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], d>;
+}
+
+// Mask creation
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, bit MayLoad = 0> {
+ let isAsmParserOnly = 1 in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle, MayLoad>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble, MayLoad>, OpSize,
+ VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $dst|$dst, $src2}"), OpNode, FR32, f32,
+ f128mem, memopfsf32, SSEPackedSingle, MayLoad>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $dst|$dst, $src2}"), OpNode, FR64, f64,
+ f128mem, memopfsf64, SSEPackedDouble, MayLoad>, TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, int HasPat = 0,
+ list<list<dag>> Pattern = []> {
+ let isAsmParserOnly = 1 in {
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>,
+ OpSize, VEX_4V;
+ }
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>,
+ TB, OpSize;
+ }
+}
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
+ // single r+r
+ [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))],
+ // double r+r
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ // single r+m
+ [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (memopv2i64 addr:$src2))))],
+ // double r+m
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (memopv2i64 addr:$src2)))]]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_rm - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+
+ let isAsmParserOnly = 1 in {
+ defm V#NAME#SS : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ OpNode, FR32, f32mem>, XS, VEX_4V;
+
+ defm V#NAME#SD : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ OpNode, FR64, f64mem>, XD, VEX_4V;
+
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
+ OpSize, VEX_4V;
+
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
+
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm SS : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ OpNode, FR32, f32mem>, XS;
+
+ defm SD : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ OpNode, FR64, f64mem>, XD;
+
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
+ f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
+ f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ "", "_ss", ssmem, sse_load_f32>, XS;
+
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ "2", "_sd", sdmem, sse_load_f64>, XD;
+ }
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_rm<0x58, "add", fadd>;
+defm MUL : basic_sse12_fp_binop_rm<0x59, "mul", fmul>;
+
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_rm<0x5C, "sub", fsub>;
+ defm DIV : basic_sse12_fp_binop_rm<0x5E, "div", fdiv>;
+}
+
+/// sse12_fp_binop_rm - Other SSE 1 & 2 binops
+///
+/// This multiclass is like basic_sse12_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+
+ let isAsmParserOnly = 1 in {
+ // Scalar operation, reg+reg.
+ defm V#NAME#SS : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ OpNode, FR32, f32mem>, XS, VEX_4V;
+
+ defm V#NAME#SD : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ OpNode, FR64, f64mem>, XD, VEX_4V;
+
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
+ OpSize, VEX_4V;
+
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
+
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
+
+ defm V#NAME#PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "", "_ps", f128mem, memopv4f32, SSEPackedSingle>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "2", "_pd", f128mem, memopv2f64, SSEPackedDouble>, OpSize,
+ VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ // Scalar operation, reg+reg.
+ defm SS : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ OpNode, FR32, f32mem>, XS;
+ defm SD : sse12_fp_scalar<opc,
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ OpNode, FR64, f64mem>, XD;
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
+ f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
+ f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ "", "_ss", ssmem, sse_load_f32>, XS;
+
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ "2", "_sd", sdmem, sse_load_f64>, XD;
+
+ defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ "", "_ps", f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ "2", "_pd", f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+ }
}
-// Arithmetic
+let isCommutable = 0 in {
+ defm MAX : sse12_fp_binop_rm<0x5F, "max", X86fmax>;
+ defm MIN : sse12_fp_binop_rm<0x5D, "min", X86fmin>;
+}
-/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
-///
+/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
/// plain scalar form, in that it takes an entire vector (instead of a
/// scalar) and leaves the top elements undefined.
///
/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F64Int,
- Intrinsic V2F64Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic V4F32Int> {
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, XS, Requires<[HasAVX, HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
+ [(set FR64:$dst, (OpNode FR64:$src))]>;
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))]>;
-
- // Vector operation, reg.
- def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, mem.
- def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
-
- // Intrinsic operation, reg.
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, mem.
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
- // Vector intrinsic operation, reg
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic V2F64Int> {
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, mem
+ [(set VR128:$dst, (V2F64Int VR128:$src))]>;
def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Square root.
+ let Predicates = [HasAVX, HasSSE2] in {
+ defm VSQRT : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ VEX_4V;
+
+ defm VSQRT : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX;
+ }
+
+ let Predicates = [HasAVX, HasSSE1] in {
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ VEX_4V;
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX;
+ // Reciprocal approximations. Note that these typically require refinement
+ // in order to obtain suitable precision.
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>,
+ VEX;
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ VEX_4V;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>,
+ VEX;
+ }
+}
+
// Square root.
-defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, int_x86_sse_rcp_ps>;
// There is no f64 version of the reciprocal approximation instructions.
-let Constraints = "$src1 = $dst" in {
- def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
- def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+ def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+ let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ }
}
+
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
}
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3
- VR128:$src1, (memopv2f64 addr:$src2))))]>;
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
- let AddedComplexity = 10 in {
- def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1,
- (memopv2f64 addr:$src2))))]>;
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
- def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+ def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+}
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
//===---------------------------------------------------------------------===//
-// SSE integer instructions
-let ExeDomain = SSEPackedInt in {
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let isAsmParserOnly = 1 in {
+ let neverHasSideEffects = 1 in
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+ let canFoldAsLoad = 1, mayLoad = 1 in {
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
+ VEX;
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+ }
+
+ let mayStore = 1 in {
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+ }
+}
-// Move Instructions
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, mayLoad = 1 in
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
-let mayStore = 1 in
-def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
-let canFoldAsLoad = 1, mayLoad = 1 in
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
XS, Requires<[HasSSE2]>;
-let mayStore = 1 in
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
XS, Requires<[HasSSE2]>;
+}
// Intrinsic forms of MOVDQU load and store
+let isAsmParserOnly = 1 in {
+let canFoldAsLoad = 1 in
+def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
+
let canFoldAsLoad = 1 in
def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
@@ -1994,55 +2250,72 @@
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, Requires<[HasSSE2]>;
-let Constraints = "$src1 = $dst" in {
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
- addr:$src2))))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr,
- Intrinsic IntId, Intrinsic IntId2> {
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+ (ins VR128:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
+ ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
(bitconvert (memopv2i64 addr:$src2)))))]>;
}
@@ -2052,64 +2325,177 @@
/// to collapse (bitconvert VT to VT) into its operand.
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (memopv2i64 addr:$src2)))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
}
-} // Constraints = "$src1 = $dst"
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+ VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+ VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+ VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+ VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+ VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+ VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+ VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+ VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+ VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+ VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+ VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+ VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+ VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+ VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+ VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+ VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+ VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+ VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+// Intrinsic forms
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-
defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+ VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+ VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+ VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+ VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+ VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+ VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+ VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+ VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def VPANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>, VEX_4V;
+ def VPANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>,
+ VEX_4V;
+}
+}
+let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
@@ -2129,17 +2515,34 @@
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
-// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
- ExeDomain = SSEPackedInt in {
- def PSLLDQri : PDIi8<0x73, MRM7r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "pslldq\t{$src2, $dst|$dst, $src2}", []>;
- def PSRLDQri : PDIi8<0x73, MRM3r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "psrldq\t{$src2, $dst|$dst, $src2}", []>;
- // PSRADQri doesn't exist in SSE[1-3].
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>;
}
+} // Constraints = "$src1 = $dst"
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
@@ -2160,32 +2563,33 @@
(v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
}
-// Logical
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-
-let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+ defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+ 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+ 0>, VEX_4V;
}
-// SSE2 Integer comparison
-defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
-defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
-defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
-defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
-defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
-defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+let Constraints = "$src1 = $dst" in {
+ defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+ defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+ defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+ defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+ defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
@@ -2213,72 +2617,138 @@
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+ 0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+ 0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+ 0, 0>, VEX_4V;
+}
-// Pack instructions
+let Constraints = "$src1 = $dst" in {
defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+ PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+ (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2
+ (bc_frag (memopv2i64 addr:$src1)),
+ (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
-// Shuffle and unpack instructions
-let AddedComplexity = 5 in {
-def PSHUFDri : PDIi8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- VR128:$src1, (undef))))]>;
-def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32 (memopv2i64 addr:$src1)),
- (undef))))]>;
-}
-
-// SSE2 with ImmT == Imm8 and XS prefix.
-def PSHUFHWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-
-// SSE2 with ImmT == Imm8 and XD prefix.
-def PSHUFLWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
-def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+ VEX;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
-// Unpack instructions
+let ExeDomain = SSEPackedInt in {
multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
- PatFrag unp_frag, PatFrag bc_frag> {
+ PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (unp_frag VR128:$src1,
- (bc_frag (memopv2i64
- addr:$src2))))]>;
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (unp_frag VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -2319,102 +2789,117 @@
(memopv2i64 addr:$src2))))]>;
}
-// Extract / Insert
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Extract
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, OpSize, VEX;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>;
-let Constraints = "$src1 = $dst" in {
- def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- GR32:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
- def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- i16mem:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))]>;
-}
-// Mask creation
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
-
-// Conditional store
-let Uses = [EDI] in
-def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+// Insert
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in
+ defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
-let Uses = [RDI] in
-def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+let Constraints = "$src1 = $dst" in
+ defm VPINSRW : sse2_pinsrw, TB, OpSize;
} // ExeDomain = SSEPackedInt
-// Non-temporal stores
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in {
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-}
+let isAsmParserOnly = 1 in
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+} // ExeDomain = SSEPackedInt
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+let ExeDomain = SSEPackedInt in {
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
+let isAsmParserOnly = 1 in {
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+}
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- // FIXME: Change encoding to pseudo.
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ VEX;
+}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2424,6 +2909,18 @@
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+// Move Int Doubleword to Single Scalar
+let isAsmParserOnly = 1 in {
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX;
+}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2432,20 +2929,18 @@
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
-// SSE2 instructions with XS prefix
-def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- Requires<[HasSSE2]>;
-def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+// Move Packed Doubleword Int to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2455,6 +2950,15 @@
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+// Move Scalar Single to Double Int
+let isAsmParserOnly = 1 in {
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -2462,44 +2966,107 @@
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15, isAsmParserOnly = 1 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>,
+ VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+let isAsmParserOnly = 1 in
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>,
+ VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX, HasSSE2]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
// Store / copy lower 64-bits of a XMM register.
+let isAsmParserOnly = 1 in
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-// movd / movq to XMM register zero-extends
-let AddedComplexity = 15 in {
-def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))]>;
-// This is X86-64 only.
-def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))]>;
-}
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 20 in {
-def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))]>;
-
-def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
-
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
- (loadi64 addr:$src))))))]>, XS,
- Requires<[HasSSE2]>;
+ (loadi64 addr:$src))))))]>,
+ XS, Requires<[HasSSE2]>;
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
@@ -2510,12 +3077,23 @@
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let isAsmParserOnly = 1, AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX, HasSSE2]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -2527,49 +3105,136 @@
(MOVZPQILo2PQIrm addr:$src)>;
}
+// Instructions to match in the assembler
+let isAsmParserOnly = 1 in {
+// This instructions is in fact an alias to movd with 64 bit dst
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+}
+
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let isAsmParserOnly = 1 in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 Instructions
+// SSE2 - Misc Instructions
//===---------------------------------------------------------------------===//
-// Move Instructions
-def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movshdup
- VR128:$src, (undef))))]>;
-def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movshdup
- (memopv4f32 addr:$src), (undef)))]>;
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ // FIXME: Change encoding to pseudo.
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movsldup
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (rep_frag
VR128:$src, (undef))))]>;
-def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movsldup
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (rep_frag
(memopv4f32 addr:$src), (undef)))]>;
+}
-def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
-def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movddup\t{$src, $dst|$dst, $src}",
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
(undef))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let isAsmParserOnly = 1 in
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+// Several Move patterns
let AddedComplexity = 5 in {
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@@ -2581,52 +3246,98 @@
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
}
-// Arithmetic
-let Constraints = "$src1 = $dst" in {
- def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- (memop addr:$src2)))]>;
- def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- (memop addr:$src2)))]>;
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src2))]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ (memop addr:$src2)))]>;
+
}
-def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+let isAsmParserOnly = 1, Predicates = [HasSSE3, HasAVX],
+ ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
+ VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+ ExeDomain = SSEPackedDouble in {
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+ def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+}
+
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@@ -2638,35 +3349,14 @@
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
}
-// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
- (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
- (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
//===---------------------------------------------------------------------===//
-// SSSE3 Instructions
+// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
-multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -2674,7 +3364,7 @@
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
- (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+ (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
@@ -2687,220 +3377,113 @@
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+ (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
-multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv4i16 addr:$src))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>;
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
-multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
+ (ins VR64:$src1, VR64:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv2i32 addr:$src))))]>;
+ (ins VR64:$src1, i64mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+ let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
-
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
-}
-
-defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
-
-/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv4i16 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
- }
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128>;
}
-/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv2i32 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
- }
-}
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
-let ImmT = NoImm in { // None of these have i8 immediate fields.
-defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
-defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
-defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
-defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
-defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
-defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
-defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
-defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128, 1>;
-
-defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
-defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
-defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
-defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
-// palignr patterns.
let Constraints = "$src1 = $dst" in {
def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
@@ -2962,10 +3545,15 @@
Requires<[HasSSSE3]>;
}
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -60,6 +60,27 @@
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
+ SrcRegNum += 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
OS << (char)C;
@@ -99,6 +120,9 @@
}
+ void EmitSegmentOverridePrefix(const MCOperand &Op, unsigned TSFlags,
+ unsigned &CurByte, raw_ostream &OS) const;
+
void EmitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
@@ -131,7 +155,6 @@
return new X86MCCodeEmitter(TM, Ctx, true);
}
-
/// isDisp8 - Return true if this signed displacement fits in a 8-bit
/// sign-extended field.
static bool isDisp8(int Value) {
@@ -188,6 +211,26 @@
EmitConstant(0, Size, CurByte, OS);
}
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(const MCOperand &Op,
+ unsigned TSFlags,
+ unsigned &CurByte,
+ raw_ostream &OS) const {
+ // If no segment register is present, we don't need anything.
+ if (Op.getReg() == 0)
+ return;
+
+ // Check if we need an override.
+ switch (Op.getReg()) {
+ case X86::CS: EmitByte(0x2E, CurByte, OS); return;
+ case X86::SS: EmitByte(0x36, CurByte, OS); return;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); return;
+ case X86::ES: EmitByte(0x26, CurByte, OS); return;
+ case X86::FS: EmitByte(0x64, CurByte, OS); return;
+ case X86::GS: EmitByte(0x65, CurByte, OS); return;
+ }
+
+ assert(0 && "Invalid segment register!");
+}
void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
@@ -341,6 +384,10 @@
if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
return;
+ bool HasVEX_4V = false;
+ if ((TSFlags >> 32) & X86II::VEX_4V)
+ HasVEX_4V = true;
+
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
//
@@ -402,9 +449,11 @@
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
+ if ((TSFlags >> 32) & X86II::VEX_W)
+ VEX_W = 1;
+
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
- case 0: break; // No prefix!
case X86II::T8: // 0F 38
VEX_5M = 0x2;
break;
@@ -421,52 +470,63 @@
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
}
unsigned NumOps = MI.getNumOperands();
- unsigned i = 0;
- unsigned SrcReg = 0, SrcRegNum = 0;
- bool IsSrcMem = false;
+ unsigned CurOp = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem:
+ NumOps = CurOp = X86AddrNumOperands;
case X86II::MRMSrcMem:
- IsSrcMem = true;
case X86II::MRMSrcReg:
- if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
- // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
- // range 0-7 and the difference between the 2 groups is given by the
- // REX prefix. In the VEX prefix, registers are seen sequencially
- // from 0-15 and encoded in 1's complement form, example:
- //
- // ModRM field => XMM9 => 1
- // VEX.VVVV => XMM9 => ~9
- //
- // See table 4-35 of Intel AVX Programming Reference for details.
- SrcReg = MI.getOperand(1).getReg();
- SrcRegNum = GetX86RegNum(MI.getOperand(1));
- if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
- SrcRegNum += 8;
-
- // The registers represented through VEX_VVVV should
- // be encoded in 1's complement form.
- if ((TSFlags >> 32) & X86II::VEX_4V)
- VEX_4V = (~SrcRegNum) & 0xf;
+ // CurOp and NumOps are equal when VEX_R represents a register used
+ // to index a memory destination (which is the last operand)
+ CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+ }
- i = 2; // Skip the VEX.VVVV operand.
- for (; i != NumOps; ++i) {
- const MCOperand &MO = MI.getOperand(i);
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_B = 0x0;
- if (!VEX_B && MO.isReg() && IsSrcMem &&
+ if (!VEX_B && MO.isReg() &&
+ ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_X = 0x0;
}
break;
- default:
+ default: // MRMDestReg, MRM0r-MRM7r
+ if (MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+ CurOp++;
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && !HasVEX_4V &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ }
+ break;
assert(0 && "Not implemented!");
}
@@ -483,7 +543,7 @@
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
- if (VEX_B && VEX_X) { // 2 byte VEX prefix
+ if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
@@ -491,7 +551,7 @@
// 3 byte VEX prefix
EmitByte(0xC4, CurByte, OS);
- EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_5M, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
}
@@ -691,15 +751,21 @@
// Keep track of the current byte being emitted.
unsigned CurByte = 0;
- // Is this instruction encoded in AVX form?
- bool IsAVXForm = false;
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = false;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = false;
+
+ if ((TSFlags >> 32) & X86II::VEX)
+ HasVEXPrefix = true;
if ((TSFlags >> 32) & X86II::VEX_4V)
- IsAVXForm = true;
+ HasVEX_4V = true;
// FIXME: We should emit the prefixes in exactly the same order as GAS does,
// in order to provide diffability.
- if (!IsAVXForm)
+ if (!HasVEXPrefix)
EmitOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
@@ -737,6 +803,7 @@
break;
case X86II::MRMDestMem:
+ EmitSegmentOverridePrefix(MI.getOperand(CurOp + 4), TSFlags, CurByte, OS);
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp,
GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
@@ -748,7 +815,7 @@
EmitByte(BaseOpcode, CurByte, OS);
SrcRegNum = CurOp + 1;
- if (IsAVXForm) // Skip 1st src (which is encoded in VEX_VVVV)
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
EmitRegModRMByte(MI.getOperand(SrcRegNum),
@@ -757,22 +824,25 @@
break;
case X86II::MRMSrcMem: {
- EmitByte(BaseOpcode, CurByte, OS);
+ int AddrOperands = X86AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
// FIXME: Maybe lea should have its own form? This is a horrible hack.
- int AddrOperands;
if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
+ --AddrOperands; // No segment register
else
- AddrOperands = X86AddrNumOperands;
+ EmitSegmentOverridePrefix(MI.getOperand(FirstMemOp+4),
+ TSFlags, CurByte, OS);
- if (IsAVXForm)
- AddrOperands++;
+ EmitByte(BaseOpcode, CurByte, OS);
- // Skip the register source (which is encoded in VEX_VVVV)
- EmitMemModRMByte(MI, IsAVXForm ? CurOp+2 : CurOp+1,
- GetX86RegNum(MI.getOperand(CurOp)),
+
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
break;
@@ -782,6 +852,8 @@
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp++),
(TSFlags & X86II::FormMask)-X86II::MRM0r,
@@ -791,6 +863,7 @@
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
+ EmitSegmentOverridePrefix(MI.getOperand(CurOp+4), TSFlags, CurByte, OS);
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
TSFlags, CurByte, OS, Fixups);
Modified: llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp Fri Jul 2 04:57:13 2010
@@ -138,7 +138,6 @@
// FALL THROUGH
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
break;
case GlobalValue::DLLImportLinkage:
llvm_unreachable("DLLImport linkage is not supported by this target!");
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp Fri Jul 2 04:57:13 2010
@@ -1379,7 +1379,6 @@
SDValue Mul0, Mul1, Addend0, Addend1;
if (N->getValueType(0) == MVT::i32 &&
isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
- SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
DAG.getVTList(MVT::i32, MVT::i32), Mul0,
Mul1, Addend0, Addend1);
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td Fri Jul 2 04:57:13 2010
@@ -733,7 +733,7 @@
// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
// tsetmr, sext (reg), zext (reg)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
"sext $dst, $src2",
Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp Fri Jul 2 04:57:13 2010
@@ -28,7 +28,7 @@
Hello() : FunctionPass(&ID) {}
virtual bool runOnFunction(Function &F) {
- HelloCounter++;
+ ++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
return false;
@@ -46,7 +46,7 @@
Hello2() : FunctionPass(&ID) {}
virtual bool runOnFunction(Function &F) {
- HelloCounter++;
+ ++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
return false;
Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile Fri Jul 2 04:57:13 2010
@@ -12,5 +12,13 @@
LOADABLE_MODULE = 1
USEDLIBS =
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports
+endif
+endif
+
include $(LEVEL)/Makefile.common
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp Fri Jul 2 04:57:13 2010
@@ -221,13 +221,16 @@
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
GS.HasPHIUser = true;
} else if (isa<CmpInst>(I)) {
+ // Nothing to analyse.
} else if (isa<MemTransferInst>(I)) {
- if (I->getOperand(1) == V)
+ const MemTransferInst *MTI = cast<MemTransferInst>(I);
+ if (MTI->getArgOperand(0) == V)
GS.StoredType = GlobalStatus::isStored;
- if (I->getOperand(2) == V)
+ if (MTI->getArgOperand(1) == V)
GS.isLoaded = true;
} else if (isa<MemSetInst>(I)) {
- assert(I->getOperand(1) == V && "Memset only takes one pointer!");
+ assert(cast<MemSetInst>(I)->getArgOperand(0) == V &&
+ "Memset only takes one pointer!");
GS.StoredType = GlobalStatus::isStored;
} else {
return true; // Any other non-load instruction might take address!
@@ -1323,8 +1326,8 @@
// if (F2) { free(F2); F2 = 0; }
// }
// The malloc can also fail if its argument is too large.
- Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
- Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
ConstantZero, "isneg");
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
@@ -1511,10 +1514,10 @@
// If this is an allocation of a fixed size array of structs, analyze as a
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
- if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
AllocTy = AT->getElementType();
-
+
const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
if (!AllocSTy)
return false;
@@ -1641,7 +1644,7 @@
// bool.
Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
- // If we're already replaced the input, StoredVal will be a cast or
+ // If we've already replaced the input, StoredVal will be a cast or
// select instruction. If not, it will be a load of the original
// global.
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
@@ -2260,8 +2263,7 @@
getVal(Values, CI->getOperand(0)),
CI->getType());
} else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult =
- ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+ InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
getVal(Values, SI->getOperand(1)),
getVal(Values, SI->getOperand(2)));
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
@@ -2302,7 +2304,8 @@
if (!Callee) return false; // Cannot resolve.
SmallVector<Constant*, 8> Formals;
- for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+ CallSite CS(CI);
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i)
Formals.push_back(getVal(Values, *i));
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp Fri Jul 2 04:57:13 2010
@@ -42,6 +42,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -262,8 +263,8 @@
// char*. It returns "void", so it doesn't need to replace any of
// Inst's uses and doesn't get a name.
CastInst* CI =
- new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst);
- Value *Args[] = { CI, Inst->getOperand(2) };
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+ Value *Args[] = { CI, Inst->getArgOperand(1) };
CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
@@ -378,7 +379,7 @@
const Type* SBPTy =
Type::getInt8PtrTy(Inst->getContext());
CastInst* BufPtr =
- new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
Value *Args[] = {
GetSetJmpMap(Func), BufPtr,
ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
@@ -473,7 +474,8 @@
// Construct the new "invoke" instruction.
TerminatorInst* Term = OldBB->getTerminator();
- std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end());
+ CallSite CS(&CI);
+ std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
InvokeInst* II =
InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
0, 0, 0, 0, // EH-FIXME!
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp Fri Jul 2 04:57:13 2010
@@ -535,6 +535,7 @@
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
return ExternalWeak;
case GlobalValue::ExternalLinkage:
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp Fri Jul 2 04:57:13 2010
@@ -66,13 +66,13 @@
return 0;
// Clone the function, so that we can hack away on it.
- DenseMap<const Value*, Value*> ValueMap;
- Function* duplicateFunction = CloneFunction(F, ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(duplicateFunction);
- BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
- BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
- BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp Fri Jul 2 04:57:13 2010
@@ -64,10 +64,10 @@
// a call to the specialized function. Returns the specialized function
static Function*
SpecializeFunction(Function* F,
- DenseMap<const Value*, Value*>& replacements) {
+ ValueMap<const Value*, Value*>& replacements) {
// arg numbers of deleted arguments
DenseMap<unsigned, const Argument*> deleted;
- for (DenseMap<const Value*, Value*>::iterator
+ for (ValueMap<const Value*, Value*>::iterator
repb = replacements.begin(), repe = replacements.end();
repb != repe; ++repb) {
Argument const *arg = cast<const Argument>(repb->first);
@@ -155,7 +155,7 @@
ee = distribution.end(); ii != ee; ++ii)
if (total > ii->second && ii->first &&
ii->second > total * ConstValPercent) {
- DenseMap<const Value*, Value*> m;
+ ValueMap<const Value*, Value*> m;
Function::arg_iterator arg = F.arg_begin();
for (int y = 0; y < interestingArgs[x]; ++y)
++arg;
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp Fri Jul 2 04:57:13 2010
@@ -73,6 +73,19 @@
AU.setPreservesAll();
}
};
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
}
char StripSymbols::ID = 0;
@@ -99,6 +112,14 @@
return new StripDebugDeclare();
}
+char StripDeadDebugInfo::ID = 0;
+static RegisterPass<StripDeadDebugInfo>
+A("strip-dead-debug-info", "Strip debug info for unused symbols");
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
/// OnlyUsedBy - Return true if V is only used by Usr.
static bool OnlyUsedBy(Value *V, Value *Usr) {
for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
@@ -223,27 +244,27 @@
Changed = true;
}
- NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
- if (NMD) {
- Changed = true;
- NMD->eraseFromParent();
- }
-
- NMD = M.getNamedMetadata("llvm.dbg.lv");
- if (NMD) {
- Changed = true;
- NMD->eraseFromParent();
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
}
-
+
unsigned MDDbgKind = M.getMDKindID("dbg");
- for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
++FI)
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
- ++BI)
+ ++BI) {
+ Changed = true; // FIXME: Only set if there was debug metadata.
BI->setMetadata(MDDbgKind, 0);
+ }
- return true;
+ return Changed;
}
bool StripSymbols::runOnModule(Module &M) {
@@ -266,8 +287,8 @@
if (Declare) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
- Value *Arg1 = CI->getOperand(1);
- Value *Arg2 = CI->getOperand(2);
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
CI->eraseFromParent();
if (Arg1->use_empty()) {
@@ -295,3 +316,83 @@
return true;
}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Debugging infomration is encoded in llvm IR using metadata. This is designed
+ // such a way that debug info for symbols preserved even if symbols are
+ // optimized away by the optimizer. This special pass removes debug info for
+ // such symbols.
+
+ // llvm.dbg.gv keeps track of debug info for global variables.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(),
+ true)) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(*I);
+ }
+ else
+ Changed = true;
+ }
+ }
+
+ // llvm.dbg.sp keeps track of debug info for subprograms.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DISubprogram(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ bool FnIsLive = false;
+ if (Function *F = DISubprogram(*I).getFunction())
+ if (M.getFunction(F->getName()))
+ FnIsLive = true;
+ if (FnIsLive) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(*I);
+ } else {
+ // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+ // to hold debug info for dead function's local variables.
+ StringRef FName = DISubprogram(*I).getLinkageName();
+ if (FName.empty())
+ FName = DISubprogram(*I).getName();
+ if (NamedMDNode *LVNMD =
+ M.getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName))))
+ LVNMD->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp Fri Jul 2 04:57:13 2010
@@ -107,12 +107,12 @@
// Check if it is ok to perform this promotion.
if (isSafeToUpdateAllCallers(F) == false) {
DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
- NumRejectedSRETUses++;
+ ++NumRejectedSRETUses;
return 0;
}
DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
- NumSRET++;
+ ++NumSRET;
// [1] Replace use of sret parameter
AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv",
F->getEntryBlock().begin());
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h Fri Jul 2 04:57:13 2010
@@ -179,7 +179,7 @@
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
Instruction *visitMalloc(Instruction &FI);
- Instruction *visitFree(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp Fri Jul 2 04:57:13 2010
@@ -112,8 +112,8 @@
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
- unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+ unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
+ unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -125,7 +125,7 @@
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
- ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
if (MemOpLength == 0) return 0;
// Source and destination pointer types are always "i8*" for intrinsic. See
@@ -140,9 +140,9 @@
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
- cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
unsigned DstAddrSp =
- cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
@@ -154,8 +154,8 @@
// an i64 load+store, here because this improves the odds that the source or
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
- Value *StrippedDest = MI->getOperand(1)->stripPointerCasts();
- if (StrippedDest != MI->getOperand(1)) {
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ if (StrippedDest != MI->getArgOperand(0)) {
const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
@@ -189,15 +189,15 @@
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
- Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
- Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
InsertNewInstBefore(L, *MI);
InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
*MI);
// Set the size of the copy to 0, it will be deleted on the next iteration.
- MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
return MI;
}
@@ -263,7 +263,7 @@
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
if (!II) return visitCallSite(&CI);
-
+
// Intrinsics cannot occur in an invoke, so handle them here instead of in
// visitCallSite.
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
@@ -289,11 +289,10 @@
if (GVSrc->isConstant()) {
Module *M = CI.getParent()->getParent()->getParent();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
- const Type *Tys[3] = { CI.getOperand(1)->getType(),
- CI.getOperand(2)->getType(),
- CI.getOperand(3)->getType() };
- CI.setCalledFunction(
- Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+ const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
Changed = true;
}
}
@@ -313,7 +312,7 @@
if (Instruction *I = SimplifyMemSet(MSI))
return I;
}
-
+
if (Changed) return II;
}
@@ -324,10 +323,10 @@
if (!TD) break;
const Type *ReturnTy = CI.getType();
- bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
// Get to the real allocated thing and offset as fast as possible.
- Value *Op1 = II->getOperand(1)->stripPointerCasts();
+ Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
// If we've stripped down to a single global variable that we
// can know the size of then just return that.
@@ -395,7 +394,6 @@
Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
return ReplaceInstUsesWith(CI, RetVal);
-
}
// Do not return "I don't know" here. Later optimization passes could
@@ -404,45 +402,45 @@
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
- if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap)
- return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+ return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
- if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
+ if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap) {
unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
TI->getType()->getPrimitiveSizeInBits();
Value *CV = ConstantInt::get(Operand->getType(), C);
- Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
+ Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
return new TruncInst(V, TI->getType());
}
}
break;
case Intrinsic::powi:
- if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0
if (Power->isZero())
return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
// powi(x, 1) -> x
if (Power->isOne())
- return ReplaceInstUsesWith(CI, II->getOperand(1));
+ return ReplaceInstUsesWith(CI, II->getArgOperand(0));
// powi(x, -1) -> 1/x
if (Power->isAllOnesValue())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
- II->getOperand(1));
+ II->getArgOperand(0));
}
break;
case Intrinsic::cttz: {
// If all bits below the first known one are known zero,
// this value is constant.
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
KnownZero, KnownOne);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
@@ -455,11 +453,11 @@
case Intrinsic::ctlz: {
// If all bits above the first known one are known zero,
// this value is constant.
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
KnownZero, KnownOne);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
@@ -470,8 +468,8 @@
}
break;
case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt Mask = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
@@ -515,19 +513,19 @@
// FALL THROUGH uadd into sadd
case Intrinsic::sadd_with_overflow:
// Canonicalize constants into the RHS.
- if (isa<Constant>(II->getOperand(1)) &&
- !isa<Constant>(II->getOperand(2))) {
- Value *LHS = II->getOperand(1);
- II->setOperand(1, II->getOperand(2));
- II->setOperand(2, LHS);
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
return II;
}
// X + undef -> undef
- if (isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
@@ -535,7 +533,7 @@
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
@@ -543,38 +541,38 @@
case Intrinsic::ssub_with_overflow:
// undef - X -> undef
// X - undef -> undef
- if (isa<UndefValue>(II->getOperand(1)) ||
- isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(0)) ||
+ isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X - 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
- UndefValue::get(II->getOperand(1)->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
// Canonicalize constants into the RHS.
- if (isa<Constant>(II->getOperand(1)) &&
- !isa<Constant>(II->getOperand(2))) {
- Value *LHS = II->getOperand(1);
- II->setOperand(1, II->getOperand(2));
- II->setOperand(2, LHS);
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
return II;
}
// X * undef -> undef
- if (isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X*0 -> {0, false}
if (RHSI->isZero())
return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
@@ -582,11 +580,11 @@
// X * 1 -> {X, false}
if (RHSI->equalsInt(1)) {
Constant *V[] = {
- UndefValue::get(II->getOperand(1)->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
@@ -597,8 +595,8 @@
case Intrinsic::x86_sse2_loadu_dq:
// Turn PPC lvx -> load if the pointer is known aligned.
// Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
@@ -606,22 +604,22 @@
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(1)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
- return new StoreInst(II->getOperand(1), Ptr);
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(2)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
- return new StoreInst(II->getOperand(2), Ptr);
+ PointerType::getUnqual(II->getArgOperand(1)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+ return new StoreInst(II->getArgOperand(1), Ptr);
}
break;
@@ -629,12 +627,12 @@
// These intrinsics only demands the 0th element of its input vector. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
- cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
APInt DemandedElts(VWidth, 1);
APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts)) {
- II->setOperand(1, V);
+ II->setArgOperand(0, V);
return II;
}
break;
@@ -642,7 +640,7 @@
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
// Check that all of the elements are integer constants or undefs.
@@ -657,8 +655,8 @@
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
- Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
- Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
@@ -691,7 +689,7 @@
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
- if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
BasicBlock::iterator BI = SS;
if (&*++BI == II)
@@ -774,13 +772,13 @@
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) {
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) {
if (SizeCI->isAllOnesValue())
return true;
if (isString)
return SizeCI->getZExtValue() >=
- GetStringLength(CI->getOperand(SizeArgOp));
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
+ GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset));
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)))
return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
return false;
@@ -848,7 +846,7 @@
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
CS.getInstruction());
- // If CS dues not return void then replaceAllUsesWith undef.
+ // If CS does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!CS.getInstruction()->getType()->isVoidTy())
CS.getInstruction()->
@@ -1145,7 +1143,7 @@
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
- Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
+ Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
@@ -1186,7 +1184,7 @@
do {
if (Idx == NestIdx) {
// Add the chain argument and attributes.
- Value *NestVal = Tramp->getOperand(3);
+ Value *NestVal = Tramp->getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
NewArgs.push_back(NestVal);
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp Fri Jul 2 04:57:13 2010
@@ -1423,7 +1423,7 @@
switch (II->getIntrinsicID()) {
case Intrinsic::bswap:
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
return &ICI;
case Intrinsic::ctlz:
@@ -1431,7 +1431,7 @@
// ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
if (RHSV == RHS->getType()->getBitWidth()) {
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
return &ICI;
}
@@ -1440,7 +1440,7 @@
// popcount(A) == 0 -> A == 0 and likewise for !=
if (RHS->isZero()) {
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, RHS);
return &ICI;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp Fri Jul 2 04:57:13 2010
@@ -404,7 +404,7 @@
isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
- Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
return new ZExtInst(Cmp, II->getType());
}
}
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Fri Jul 2 04:57:13 2010
@@ -732,10 +732,10 @@
// the right place.
Instruction *NewVal;
if (InputBit > ResultBit)
- NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
ConstantInt::get(I->getType(), InputBit-ResultBit));
else
- NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
ConstantInt::get(I->getType(), ResultBit-InputBit));
NewVal->takeName(I);
return InsertNewInstBefore(NewVal, *I);
@@ -1052,12 +1052,12 @@
case Intrinsic::x86_sse2_mul_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
- TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth+1);
- if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
UndefElts2, Depth+1);
- if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
// scalarize it now.
@@ -1069,8 +1069,8 @@
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
// TODO: Lower MIN/MAX/ABS/etc
- Value *LHS = II->getOperand(1);
- Value *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
// Extract the element as scalars.
LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul 2 04:57:13 2010
@@ -756,8 +756,8 @@
-Instruction *InstCombiner::visitFree(Instruction &FI) {
- Value *Op = FI.getOperand(1);
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
@@ -925,7 +925,7 @@
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
// We're extracting from an intrinsic, see if we're the only user, which
// allows us to simplify multiple result intrinsics to simpler things that
- // just get one value..
+ // just get one value.
if (II->hasOneUse()) {
// Check if we're grabbing the overflow bit or the result of a 'with
// overflow' intrinsic. If it's the latter we can remove the intrinsic
@@ -934,7 +934,7 @@
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateAdd(LHS, RHS);
@@ -943,7 +943,7 @@
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateSub(LHS, RHS);
@@ -952,7 +952,7 @@
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateMul(LHS, RHS);
Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp Fri Jul 2 04:57:13 2010
@@ -143,7 +143,7 @@
ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
printEdgeCounter(edge,entry,i);
- IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
Initializer[i++] = (Uncounted);
@@ -166,7 +166,7 @@
ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
printEdgeCounter(edge,BB,i);
- IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
Initializer[i++] = (Uncounted);
@@ -189,11 +189,11 @@
if (TI->getNumSuccessors() == 1) {
// Insert counter at the start of the block
printEdgeCounter(edge,BB,i);
- IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
} else {
// Insert counter at the start of the block
printEdgeCounter(edge,Succ,i);
- IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
}
Initializer[i++] = (Zero);
} else {
Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp Fri Jul 2 04:57:13 2010
@@ -61,8 +61,8 @@
}
Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
- Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
- "newargc", InsertPos);
+ CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+ "newargc", InsertPos);
// If argc or argv are not available in main, just pass null values in.
Function::arg_iterator AI;
@@ -73,10 +73,10 @@
if (AI->getType() != ArgVTy) {
Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
false);
- InitCall->setOperand(2,
+ InitCall->setArgOperand(1,
CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
} else {
- InitCall->setOperand(2, AI);
+ InitCall->setArgOperand(1, AI);
}
/* FALL THROUGH */
@@ -93,12 +93,12 @@
}
opcode = CastInst::getCastOpcode(AI, true,
Type::getInt32Ty(Context), true);
- InitCall->setOperand(1,
+ InitCall->setArgOperand(0,
CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
"argc.cast", InitCall));
} else {
AI->replaceAllUsesWith(InitCall);
- InitCall->setOperand(1, AI);
+ InitCall->setArgOperand(0, AI);
}
case 0: break;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp Fri Jul 2 04:57:13 2010
@@ -83,7 +83,7 @@
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
- NumRemoved++;
+ ++NumRemoved;
(*I)->eraseFromParent();
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp Fri Jul 2 04:57:13 2010
@@ -548,7 +548,8 @@
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp)))
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
+ - CallInst::ArgOffset)))
return SizeCI->isAllOnesValue();
return false;
}
@@ -559,7 +560,7 @@
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
CI->replaceAllUsesWith(RetVal);
@@ -759,8 +760,7 @@
}
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue(),
- OpInfo.ConstraintType == TargetLowering::C_Memory);
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp Fri Jul 2 04:57:13 2010
@@ -56,7 +56,8 @@
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
+ bool handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep);
bool handleEndBlock(BasicBlock &BB);
bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
BasicBlock::iterator &BBI,
@@ -123,14 +124,15 @@
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getOperand(1);
-
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ return MI->getArgOperand(0);
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
- return I->getOperand(1);
+ return II->getArgOperand(0);
case Intrinsic::lifetime_end:
- return I->getOperand(2);
+ return II->getArgOperand(1);
}
}
@@ -147,12 +149,13 @@
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
Len = MI->getLength();
} else {
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return -1u;
case Intrinsic::lifetime_end:
- Len = I->getOperand(1);
+ Len = II->getArgOperand(0);
break;
}
}
@@ -201,8 +204,8 @@
if (InstDep.isNonLocal()) continue;
// Handle frees whose dependencies are non-trivial.
- if (isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep);
+ if (const CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
continue;
}
@@ -218,7 +221,7 @@
isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
@@ -249,7 +252,7 @@
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -270,7 +273,7 @@
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -287,7 +290,8 @@
/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
+bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Instruction *Dependency = Dep.getInst();
@@ -297,13 +301,13 @@
Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
- if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
+ if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
AliasAnalysis::MustAlias)
return false;
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency);
- NumFastStores++;
+ ++NumFastStores;
return true;
}
@@ -349,9 +353,9 @@
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
Instruction *Dead = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Dead, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -371,9 +375,9 @@
// However, if this load is unused and not volatile, we can go ahead and
// remove it, and not have to worry about it making our pointer undead!
if (L->use_empty() && !L->isVolatile()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(L, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -391,9 +395,9 @@
// Dead alloca's can be DCE'd when we reach them
if (A->use_empty()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(A, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
}
@@ -426,9 +430,9 @@
getPointerSize(*I));
if (A == AliasAnalysis::ModRef)
- modRef++;
+ ++modRef;
else
- other++;
+ ++other;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
dead.push_back(*I);
@@ -442,9 +446,9 @@
} else if (isInstructionTriviallyDead(BBI)) {
// For any non-memory-affecting non-terminators, DCE them as we reach them
Instruction *Inst = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Inst, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -497,7 +501,7 @@
// Remove it!
++BBI;
DeleteDeadInstruction(S, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp Fri Jul 2 04:57:13 2010
@@ -272,7 +272,8 @@
e.function = C->getCalledFunction();
e.opcode = Expression::CALL;
- for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+ CallSite CS(C);
+ for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
e.varargs.push_back(lookup_or_add(*I));
@@ -448,14 +449,14 @@
if (local_dep.isDef()) {
CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
- if (local_cdep->getNumOperands() != C->getNumOperands()) {
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -505,13 +506,13 @@
return nextValueNumber++;
}
- if (cdep->getNumOperands() != C->getNumOperands()) {
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -1501,7 +1502,7 @@
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1724,7 +1725,7 @@
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumPRELoad++;
+ ++NumPRELoad;
return true;
}
@@ -1785,7 +1786,7 @@
MD->invalidateCachedPointerInfo(AvailVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1831,7 +1832,7 @@
MD->invalidateCachedPointerInfo(StoredVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1861,7 +1862,7 @@
MD->invalidateCachedPointerInfo(DepLI);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1872,7 +1873,7 @@
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1883,7 +1884,7 @@
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
}
@@ -2015,7 +2016,7 @@
BasicBlock *BB = FI;
++FI;
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
- if (removedBlock) NumGVNBlocks++;
+ if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
}
@@ -2142,12 +2143,12 @@
localAvail[*PI]->table.find(ValNo);
if (predV == localAvail[*PI]->table.end()) {
PREPred = *PI;
- NumWithout++;
+ ++NumWithout;
} else if (predV->second == CurInst) {
NumWithout = 2;
} else {
predMap[*PI] = predV->second;
- NumWith++;
+ ++NumWith;
}
}
@@ -2202,7 +2203,7 @@
PREInstr->setName(CurInst->getName() + ".pre");
predMap[PREPred] = PREInstr;
VN.add(PREInstr, ValNo);
- NumGVNPRE++;
+ ++NumGVNPRE;
// Update the availability map to include the new instruction.
localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp Fri Jul 2 04:57:13 2010
@@ -83,7 +83,7 @@
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
- BI++;
+ ++BI;
}
// Make sure that no instructions in the block have potential side-effects.
@@ -176,7 +176,7 @@
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
P->replaceUsesOfWith(exitingBlock, preheader);
- BI++;
+ ++BI;
}
// Update the dominator tree and remove the instructions and blocks that will
@@ -226,7 +226,7 @@
LPM.deleteLoopFromQueue(L);
Changed = true;
- NumDeleted++;
+ ++NumDeleted;
return Changed;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp Fri Jul 2 04:57:13 2010
@@ -649,7 +649,7 @@
}
}
}
- NumRestrictBounds++;
+ ++NumRestrictBounds;
return true;
}
@@ -1016,13 +1016,13 @@
BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
// [*] Clone Loop
- DenseMap<const Value *, Value *> ValueMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
+ ValueMap<const Value *, Value *> VMap;
+ Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
Loop *ALoop = L;
// [*] ALoop's exiting edge enters BLoop's header.
// ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
+ PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
BasicBlock *A_ExitingBlock = ExitCondition->getParent();
BranchInst *A_ExitInsn =
dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
@@ -1047,7 +1047,7 @@
for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
+ PHINode *PNClone = cast<PHINode>(VMap[PN]);
InverseMap[PNClone] = PN;
} else
break;
@@ -1085,11 +1085,11 @@
// block. Remove incoming PHINode values from ALoop's exiting block.
// Add new incoming values from BLoop's incoming exiting value.
// Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
+ BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
+ PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
B_ExitingBlock);
PN->removeIncomingValue(A_ExitingBlock);
} else
@@ -1131,7 +1131,7 @@
removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
//[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
+ BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
BasicBlock *B_InactiveBranch = NULL;
BasicBlock *B_ActiveBranch = NULL;
@@ -1146,9 +1146,9 @@
//[*] Move exit condition into split condition block to avoid
// executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);
+ ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
+ Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
+ ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
@@ -1159,7 +1159,7 @@
B_SplitCondition, B_IndVar, B_IndVarIncrement,
BLoop, EVOpNum);
- NumIndexSplit++;
+ ++NumIndexSplit;
return true;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp Fri Jul 2 04:57:13 2010
@@ -147,7 +147,7 @@
continue; // PHI nodes don't count.
if (isa<DbgInfoIntrinsic>(OI))
continue; // Debug intrinsics don't count as size.
- Size++;
+ ++Size;
}
if (Size > MAX_HEADER_SIZE)
@@ -263,7 +263,7 @@
preserveCanonicalLoopForm(LPM);
- NumRotated++;
+ ++NumRotated;
return true;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp Fri Jul 2 04:57:13 2010
@@ -392,12 +392,13 @@
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given add can be sign-extended
+/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
-static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
- return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
@@ -413,20 +414,28 @@
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
- // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
- // folding.
- if (RHS->isAllOnesValue())
- return SE.getMulExpr(LHS, RHS);
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
- const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (!RC)
return 0;
- if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
return 0;
- return SE.getConstant(C->getValue()->getValue()
- .sdiv(RC->getValue()->getValue()));
+ return SE.getConstant(LA.sdiv(RA));
}
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
@@ -440,6 +449,7 @@
if (!Step) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
+ return 0;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -455,10 +465,11 @@
}
return SE.getAddExpr(Ops);
}
+ return 0;
}
// Check for a multiply operand that we can pull RHS out of.
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
@@ -475,6 +486,8 @@
}
return Found ? SE.getMulExpr(Ops) : 0;
}
+ return 0;
+ }
// Otherwise we don't know.
return 0;
@@ -546,7 +559,7 @@
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == OperandVal)
+ if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
@@ -568,7 +581,7 @@
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getOperand(1)->getType();
+ AccessTy = II->getArgOperand(0)->getType();
break;
}
}
@@ -976,6 +989,8 @@
void dump() const;
};
+}
+
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
@@ -1203,6 +1218,32 @@
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
/// FormulaSorter - This class implements an ordering for formulae which sorts
/// the by their standalone cost.
class FormulaSorter {
@@ -1275,7 +1316,9 @@
}
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<const SCEV *, size_t> UseMapTy;
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -1613,8 +1656,11 @@
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
else
- llvm_unreachable("Max doesn't match expected pattern!");
+ // Max doesn't match expected pattern.
+ return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
@@ -1805,6 +1851,8 @@
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
@@ -1833,7 +1881,7 @@
}
std::pair<UseMapTy::iterator, bool> P =
- UseMap.insert(std::make_pair(Expr, 0));
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
@@ -1919,7 +1967,7 @@
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+ Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
@@ -2086,7 +2134,7 @@
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
- Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+ Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2159,20 +2207,23 @@
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
+ SmallVectorImpl<const SCEV *> &UninterestingOps,
+ const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, SE);
+ CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()), C, Ops, SE);
- CollectSubexprs(AR->getStart(), C, Ops, SE);
+ AR->getLoop()),
+ C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2182,13 +2233,17 @@
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, SE);
+ Ops, UninterestingOps, L, SE);
return;
}
}
- // Otherwise use the value itself.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself. Loop-variant "unknown" values are
+ // uninteresting; we won't be able to do anything meaningful with them.
+ if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
+ UninterestingOps.push_back(S);
+ else
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2202,8 +2257,15 @@
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, SE);
+ SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
+
+ // Add any uninteresting values as one register, as we won't be able to
+ // form any interesting reassociation opportunities with them. They'll
+ // just have to be added inside the loop no matter what we do.
+ if (!UninterestingAddOps.empty())
+ AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
@@ -2216,11 +2278,10 @@
continue;
// Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps;
- for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
- KE = AddOps.end(); K != KE; ++K)
- if (K != J)
- InnerAddOps.push_back(*K);
+ SmallVector<const SCEV *, 8> InnerAddOps
+ ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2354,13 +2415,12 @@
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Formula F = Base;
// Check that the multiplication doesn't overflow.
- if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
continue;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (NewBaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
@@ -2371,6 +2431,9 @@
if (Offset / Factor != LU.MinOffset)
continue;
+ Formula F = Base;
+ F.AM.BaseOffs = NewBaseOffs;
+
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Jul 2 04:57:13 2010
@@ -457,21 +457,21 @@
}
// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by ValueMap.
+// current values into those specified by VMap.
//
static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
+ ValueMap<const Value *, Value*> &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end()) Op = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end()) Op = It->second;
I->setOperand(op, Op);
}
}
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -615,11 +615,11 @@
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- DenseMap<const Value*, Value*> ValueMap;
+ ValueMap<const Value*, Value*> VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
- BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
- ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
}
@@ -629,7 +629,7 @@
NewBlocks[0], F->end());
// Now we create the new Loop object for the versioned loop.
- Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
Loop *ParentLoop = L->getParentLoop();
if (ParentLoop) {
// Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -638,7 +638,7 @@
}
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
@@ -653,8 +653,8 @@
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
- if (It != ValueMap.end()) V = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
}
@@ -663,7 +663,7 @@
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, ValueMap);
+ RemapInstruction(I, VMap);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp Fri Jul 2 04:57:13 2010
@@ -632,7 +632,7 @@
// Remove the memcpy
MD.removeInstruction(cpy);
cpy->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
@@ -710,7 +710,7 @@
if (MD.getDependency(C) == dep) {
MD.removeInstruction(M);
M->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp Fri Jul 2 04:57:13 2010
@@ -926,7 +926,7 @@
DeleteDeadInstructions();
AI->eraseFromParent();
- NumReplaced++;
+ ++NumReplaced;
}
/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
@@ -965,11 +965,11 @@
isSafeGEP(GEPI, AI, GEPOffset, Info);
if (!Info.isUnsafe)
isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 1, Info);
+ UI.getOperandNo() == CallInst::ArgOffset, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1373,7 +1373,7 @@
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
if (CI->isZero()) {
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
@@ -1436,7 +1436,7 @@
Value *Ops[] = {
SROADest ? EltPtr : OtherElt, // Dest ptr
SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
// Align
ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
MI->getVolatileCst()
@@ -1451,8 +1451,8 @@
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
- EltPtr, MI->getOperand(2), // Dest, Value,
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ EltPtr, MI->getArgOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
Zero, // Align
ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
@@ -1655,7 +1655,12 @@
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
}
- ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
}
// Handle tail padding by truncating the result
@@ -1794,7 +1799,7 @@
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 1) return false;
+ if (UI.getOperandNo() != CallInst::ArgOffset) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp Fri Jul 2 04:57:13 2010
@@ -129,8 +129,8 @@
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -181,12 +181,12 @@
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
uint64_t Len;
// We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
return 0;
@@ -226,11 +226,11 @@
FT->getParamType(0) != FT->getReturnType())
return 0;
- Value *SrcStr = CI->getOperand(1);
+ Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (CharC == 0) {
// These optimizations require TargetData.
if (!TD) return 0;
@@ -239,7 +239,7 @@
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
- return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
B, TD);
}
@@ -284,7 +284,7 @@
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -333,13 +333,13 @@
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
// Get the length argument if it is constant.
uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Length = LengthArg->getZExtValue();
else
return 0;
@@ -348,7 +348,7 @@
return ConstantInt::get(CI->getType(), 0);
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getOperand(3), B, TD);
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
std::string Str1, Str2;
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
@@ -387,7 +387,7 @@
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
@@ -403,7 +403,7 @@
if (OptChkCall)
EmitMemCpyChk(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getOperand(3), B, TD);
+ CI->getArgOperand(2), B, TD);
else
EmitMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
@@ -424,9 +424,9 @@
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
- Value *LenOp = CI->getOperand(3);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
@@ -474,7 +474,7 @@
!FT->getReturnType()->isIntegerTy())
return 0;
- Value *Src = CI->getOperand(1);
+ Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
if (uint64_t Len = GetStringLength(Src))
@@ -499,7 +499,7 @@
!FT->getParamType(1)->isPointerTy())
return 0;
- Value *EndPtr = CI->getOperand(2);
+ Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
CI->setOnlyReadsMemory();
CI->addAttribute(1, Attribute::NoCapture);
@@ -522,13 +522,13 @@
return 0;
// fold strstr(x, x) -> x.
- if (CI->getOperand(1) == CI->getOperand(2))
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getOperand(1))) {
- Value *StrLen = EmitStrLen(CI->getOperand(2), B, TD);
- Value *StrNCmp = EmitStrNCmp(CI->getOperand(1), CI->getOperand(2),
+ if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
StrLen, B, TD);
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
@@ -544,12 +544,12 @@
// See if either input string is a constant string.
std::string SearchStr, ToFindStr;
- bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
- bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
+ bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
@@ -559,14 +559,14 @@
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getOperand(1), B);
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
CI->getType());
return 0;
}
@@ -584,13 +584,13 @@
!FT->getReturnType()->isIntegerTy(32))
return 0;
- Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
// Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!LenC) return 0;
uint64_t Len = LenC->getZExtValue();
@@ -637,9 +637,9 @@
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -659,9 +659,9 @@
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -681,10 +681,10 @@
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
- false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
- return CI->getOperand(1);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
+ false);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -705,7 +705,7 @@
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
return Op1C;
@@ -759,7 +759,7 @@
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
Value *LdExpArg = 0;
@@ -811,7 +811,7 @@
return 0;
// If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
return 0;
@@ -840,7 +840,7 @@
!FT->getParamType(0)->isIntegerTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
@@ -876,7 +876,7 @@
return 0;
// isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
"isdigittmp");
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
@@ -897,7 +897,7 @@
return 0;
// isascii(c) -> c <u 128
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
"isascii");
return B.CreateZExt(Op, CI->getType());
@@ -916,7 +916,7 @@
return 0;
// abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Value *Pos = B.CreateICmpSGT(Op,
Constant::getAllOnesValue(Op->getType()),
"ispos");
@@ -938,7 +938,7 @@
return 0;
// isascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getOperand(1),
+ return B.CreateAnd(CI->getArgOperand(0),
ConstantInt::get(CI->getType(),0x7F));
}
};
@@ -961,7 +961,7 @@
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
return 0;
// Empty format string -> noop.
@@ -993,20 +993,20 @@
}
// Optimize specific format strings.
- // printf("%c", chr) --> putchar(*(i8*)dst)
- if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isPointerTy() &&
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy() &&
CI->use_empty()) {
- EmitPutS(CI->getOperand(2), B, TD);
+ EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
return 0;
@@ -1027,11 +1027,11 @@
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1042,7 +1042,7 @@
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()+1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -1050,16 +1050,17 @@
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getOperand(3),
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2),
Type::getInt8Ty(*Context), "char");
- Value *Ptr = CastToCStr(CI->getOperand(1), B);
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
"nul");
@@ -1073,13 +1074,13 @@
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1103,8 +1104,8 @@
return 0;
// Get the element size and count.
- ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
- ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!SizeC || !CountC) return 0;
uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
@@ -1114,8 +1115,8 @@
// If this is writing one byte, turn it into fputc.
if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
- EmitFPutC(Char, CI->getOperand(4), B, TD);
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1139,11 +1140,11 @@
return 0;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
- uint64_t Len = GetStringLength(CI->getOperand(1));
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getOperand(1),
+ EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getOperand(2), B, TD);
+ CI->getArgOperand(1), B, TD);
return CI; // Known to have no uses (see above).
}
};
@@ -1162,11 +1163,11 @@
// All the optimizations depend on the format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return 0; // We found a format specifier.
@@ -1174,31 +1175,32 @@
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getOperand(2),
+ EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()),
- CI->getOperand(1), B, TD);
+ CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
- // fprintf(F, "%c", chr) --> *(i8*)dst = chr
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
- // fprintf(F, "%s", str) -> fputs(str, F)
- if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return CI;
}
return 0;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp Fri Jul 2 04:57:13 2010
@@ -253,7 +253,7 @@
// If we are passing this argument into call as the corresponding
// argument operand, then the argument is dynamically constant.
// Otherwise, we cannot transform this function safely.
- if (CI->getOperand(ArgNo+1) == Arg)
+ if (CI->getArgOperand(ArgNo) == Arg)
return true;
}
@@ -270,16 +270,16 @@
}
// getCommonReturnValue - Check to see if the function containing the specified
-// return instruction and tail call consistently returns the same
-// runtime-constant value at all exit points. If so, return the returned value.
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
//
-static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
- Function *F = TheRI->getParent()->getParent();
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != TheRI) {
+ if (RI != IgnoreRI) {
Value *RetOp = RI->getOperand(0);
// We can only perform this transformation if the value returned is
@@ -404,7 +404,7 @@
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(Ret, CI))
+ !getCommonReturnValue(0, CI))
return false;
// OK! We can transform this tail call. If this is the first one found,
@@ -454,8 +454,8 @@
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
- ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
// If we are introducing an accumulator variable to eliminate the recursion,
// do so now. Note that we _know_ that no subsequent tail recursion
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp Fri Jul 2 04:57:13 2010
@@ -381,29 +381,28 @@
const TargetLowering &TLI) {
std::vector<InlineAsm::ConstraintInfo>
Constraints = IA->ParseConstraints();
-
- unsigned ArgNo = 1; // ArgNo - The operand of the CallInst.
+
+ unsigned ArgNo = 0; // The argument of the CallInst.
for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
+
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
break;
case InlineAsm::isInput:
- OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
-
+
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue(),
- OpInfo.ConstraintType == TargetLowering::C_Memory);
-
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
if (OpInfo.CallOperandVal == OpVal &&
@@ -411,7 +410,7 @@
!OpInfo.isIndirect))
return false;
}
-
+
return true;
}
@@ -450,7 +449,7 @@
if (CallInst *CI = dyn_cast<CallInst>(U)) {
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
- if (IA == 0) return true;
+ if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp Fri Jul 2 04:57:13 2010
@@ -420,11 +420,11 @@
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
1, false, B, TD);
- replaceCall(CI->getOperand(1));
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -443,11 +443,11 @@
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
1, false, B, TD);
- replaceCall(CI->getOperand(1));
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -461,12 +461,12 @@
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
- replaceCall(CI->getOperand(1));
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -487,8 +487,8 @@
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFoldable(3, 2, true)) {
- Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD,
+ if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
Name.substr(2, 6));
replaceCall(Ret);
return true;
@@ -504,10 +504,10 @@
!FT->getParamType(2)->isIntegerTy() ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), B, TD, Name.substr(2, 7));
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, Name.substr(2, 7));
replaceCall(Ret);
return true;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp Fri Jul 2 04:57:13 2010
@@ -32,7 +32,7 @@
// CloneBasicBlock - See comments in Cloning.h
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
ClonedCodeInfo *CodeInfo) {
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
@@ -47,7 +47,7 @@
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[II] = NewInst; // Add instruction map to value.
+ VMap[II] = NewInst; // Add instruction map to value.
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -72,7 +72,7 @@
// ArgMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -80,17 +80,17 @@
#ifndef NDEBUG
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- assert(ValueMap.count(I) && "No mapping from source argument specified!");
+ assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
// Clone any attributes.
if (NewFunc->arg_size() == OldFunc->arg_size())
NewFunc->copyAttributesFrom(OldFunc);
else {
- //Some arguments were deleted with the ValueMap. Copy arguments one by one
+ //Some arguments were deleted with the VMap. Copy arguments one by one
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(ValueMap[I]))
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
Anew->addAttr( OldFunc->getAttributes()
.getParamAttributes(I->getArgNo() + 1));
NewFunc->setAttributes(NewFunc->getAttributes()
@@ -111,43 +111,43 @@
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc,
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
CodeInfo);
- ValueMap[&BB] = CBB; // Add basic block mapping.
+ VMap[&BB] = CBB; // Add basic block mapping.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
Returns.push_back(RI);
}
// Loop over all of the instructions in the function, fixing up operand
- // references as we go. This uses ValueMap to do all the hard work.
+ // references as we go. This uses VMap to do all the hard work.
//
- for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, ValueMap);
+ RemapInstruction(II, VMap);
}
/// CloneFunction - Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
-/// in the ValueMap are changed to refer to their mapped value instead of the
-/// original one. If any of the arguments to the function are in the ValueMap,
-/// the arguments are deleted from the resultant function. The ValueMap is
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
/// updated to include mappings from all of the instructions and basicblocks in
/// the function from their old to new values.
///
Function *llvm::CloneFunction(const Function *F,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
// The user might be deleting arguments to the function by specifying them in
- // the ValueMap. If so, we need to not add the arguments to the arg ty vector
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
//
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
- if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
ArgTypes.push_back(I->getType());
// Create a new function type...
@@ -161,13 +161,13 @@
Function::arg_iterator DestI = NewF->arg_begin();
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
- if (ValueMap.count(I) == 0) { // Is this argument preserved?
+ if (VMap.count(I) == 0) { // Is this argument preserved?
DestI->setName(I->getName()); // Copy the name over...
- ValueMap[I] = DestI++; // Add mapping to ValueMap
+ VMap[I] = DestI++; // Add mapping to VMap
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
return NewF;
}
@@ -179,19 +179,19 @@
struct PruningFunctionCloner {
Function *NewFunc;
const Function *OldFunc;
- DenseMap<const Value*, Value*> &ValueMap;
+ ValueToValueMapTy &VMap;
SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
const TargetData *TD;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- DenseMap<const Value*, Value*> &valueMap,
+ ValueToValueMapTy &valueMap,
SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
- : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
@@ -202,7 +202,7 @@
public:
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
- /// mapping its operands through ValueMap if they are available.
+ /// mapping its operands through VMap if they are available.
Constant *ConstantFoldMappedInstruction(const Instruction *I);
};
}
@@ -211,7 +211,7 @@
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- Value *&BBEntry = ValueMap[BB];
+ Value *&BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -230,7 +230,7 @@
// If this instruction constant folds, don't bother cloning the instruction,
// instead, just add the constant to the value map.
if (Constant *C = ConstantFoldMappedInstruction(II)) {
- ValueMap[II] = C;
+ VMap[II] = C;
continue;
}
@@ -238,7 +238,7 @@
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[II] = NewInst; // Add instruction map to value.
+ VMap[II] = NewInst; // Add instruction map to value.
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -258,12 +258,12 @@
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
if (Cond == 0)
- Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]);
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
// Constant fold to uncond branch!
if (Cond) {
BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
- ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
@@ -272,10 +272,10 @@
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
if (Cond == 0) // Or known constant after constant prop in the callee...
- Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]);
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
if (Cond) { // Constant fold to uncond branch!
BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
- ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
@@ -286,7 +286,7 @@
if (OldTI->hasName())
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[OldTI] = NewInst; // Add instruction map to value.
+ VMap[OldTI] = NewInst; // Add instruction map to value.
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
@@ -307,13 +307,13 @@
}
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through ValueMap if they are available.
+/// mapping its operands through VMap if they are available.
Constant *PruningFunctionCloner::
ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- ValueMap)))
+ VMap)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -363,7 +363,7 @@
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
@@ -374,10 +374,10 @@
#ifndef NDEBUG
for (Function::const_arg_iterator II = OldFunc->arg_begin(),
E = OldFunc->arg_end(); II != E; ++II)
- assert(ValueMap.count(II) && "No mapping from source argument specified!");
+ assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
- PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns,
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
@@ -397,14 +397,14 @@
SmallVector<const PHINode*, 16> PHIToResolve;
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
- BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
if (NewBB == 0) continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
// Loop over all of the instructions in the block, fixing up operand
- // references as we go. This uses ValueMap to do all the hard work.
+ // references as we go. This uses VMap to do all the hard work.
//
BasicBlock::iterator I = NewBB->begin();
@@ -455,7 +455,7 @@
I->setMetadata(DbgKind, 0);
}
}
- RemapInstruction(I, ValueMap);
+ RemapInstruction(I, VMap);
}
}
@@ -465,19 +465,19 @@
const PHINode *OPN = PHIToResolve[phino];
unsigned NumPreds = OPN->getNumIncomingValues();
const BasicBlock *OldBB = OPN->getParent();
- BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]);
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
// Map operands for blocks that are live and remove operands for blocks
// that are dead.
for (; phino != PHIToResolve.size() &&
PHIToResolve[phino]->getParent() == OldBB; ++phino) {
OPN = PHIToResolve[phino];
- PHINode *PN = cast<PHINode>(ValueMap[OPN]);
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
if (BasicBlock *MappedBlock =
- cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
+ cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- ValueMap);
+ VMap);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
@@ -531,15 +531,15 @@
while ((PN = dyn_cast<PHINode>(I++))) {
Value *NV = UndefValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
- assert(ValueMap[OldI] == PN && "ValueMap mismatch");
- ValueMap[OldI] = NV;
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
PN->eraseFromParent();
++OldI;
}
}
// NOTE: We cannot eliminate single entry phi nodes here, because of
- // ValueMap. Single entry phi nodes can have multiple ValueMap entries
- // pointing at them. Thus, deleting one would require scanning the ValueMap
+ // VMap. Single entry phi nodes can have multiple VMap entries
+ // pointing at them. Thus, deleting one would require scanning the VMap
// to update any entries in it that would require that. This would be
// really slow.
}
@@ -548,14 +548,14 @@
// and zap unconditional fall-through branches. This happen all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]);
+ Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
while (I != NewFunc->end()) {
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
// Note that we can't eliminate uncond branches if the destination has
// single-entry PHI nodes. Eliminating the single-entry phi nodes would
- // require scanning the ValueMap to update any entries that point to the phi
+ // require scanning the VMap to update any entries that point to the phi
// node.
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp Fri Jul 2 04:57:13 2010
@@ -15,7 +15,6 @@
#include "llvm/BasicBlock.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -23,13 +22,13 @@
/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
/// dominance info. It is expected that basic block is already cloned.
static void CloneDominatorInfo(BasicBlock *BB,
- DenseMap<const Value *, Value *> &ValueMap,
+ ValueMap<const Value *, Value *> &VMap,
DominatorTree *DT,
DominanceFrontier *DF) {
assert (DT && "DominatorTree is not available");
- DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB);
- assert (BI != ValueMap.end() && "BasicBlock clone is missing");
+ ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ assert (BI != VMap.end() && "BasicBlock clone is missing");
BasicBlock *NewBB = cast<BasicBlock>(BI->second);
// NewBB already got dominator info.
@@ -43,11 +42,11 @@
// NewBB's dominator is either BB's dominator or BB's dominator's clone.
BasicBlock *NewBBDom = BBDom;
- DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom);
- if (BBDomI != ValueMap.end()) {
+ ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ if (BBDomI != VMap.end()) {
NewBBDom = cast<BasicBlock>(BBDomI->second);
if (!DT->getNode(NewBBDom))
- CloneDominatorInfo(BBDom, ValueMap, DT, DF);
+ CloneDominatorInfo(BBDom, VMap, DT, DF);
}
DT->addNewBlock(NewBB, NewBBDom);
@@ -60,8 +59,8 @@
for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
I != E; ++I) {
BasicBlock *DB = *I;
- DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB);
- if (IDM != ValueMap.end())
+ ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+ if (IDM != VMap.end())
NewDFSet.insert(cast<BasicBlock>(IDM->second));
else
NewDFSet.insert(DB);
@@ -71,10 +70,10 @@
}
}
-/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap
+/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
/// using old blocks to new blocks mapping.
Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- DenseMap<const Value *, Value *> &ValueMap, Pass *P) {
+ ValueMap<const Value *, Value *> &VMap, Pass *P) {
DominatorTree *DT = NULL;
DominanceFrontier *DF = NULL;
@@ -104,8 +103,8 @@
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone");
- ValueMap[BB] = NewBB;
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
+ VMap[BB] = NewBB;
if (P)
LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
@@ -117,7 +116,7 @@
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- CloneDominatorInfo(BB, ValueMap, DT, DF);
+ CloneDominatorInfo(BB, VMap, DT, DF);
}
// Process sub loops
@@ -125,7 +124,7 @@
LoopNest.push_back(*I);
} while (!LoopNest.empty());
- // Remap instructions to reference operands from ValueMap.
+ // Remap instructions to reference operands from VMap.
for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(),
NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) {
BasicBlock *NB = *NBItr;
@@ -135,8 +134,8 @@
for (unsigned index = 0, num_ops = Insn->getNumOperands();
index != num_ops; ++index) {
Value *Op = Insn->getOperand(index);
- DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op);
- if (OpItr != ValueMap.end())
+ ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ if (OpItr != VMap.end())
Insn->setOperand(index, OpItr->second);
}
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp Fri Jul 2 04:57:13 2010
@@ -28,12 +28,12 @@
Module *llvm::CloneModule(const Module *M) {
// Create the value map that maps things from the old module over to the new
// module.
- DenseMap<const Value*, Value*> ValueMap;
- return CloneModule(M, ValueMap);
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
}
Module *llvm::CloneModule(const Module *M,
- DenseMap<const Value*, Value*> &ValueMap) {
+ ValueToValueMapTy &VMap) {
// First off, we need to create the new module...
Module *New = new Module(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
@@ -51,7 +51,7 @@
New->addLibrary(*I);
// Loop over all of the global variables, making corresponding globals in the
- // new module. Here we add them to the ValueMap and to the new Module. We
+ // new module. Here we add them to the VMap and to the new Module. We
// don't worry about attributes or initializers, they will come later.
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
@@ -62,7 +62,7 @@
GlobalValue::ExternalLinkage, 0,
I->getName());
GV->setAlignment(I->getAlignment());
- ValueMap[I] = GV;
+ VMap[I] = GV;
}
// Loop over the functions in the module, making external functions as before
@@ -71,13 +71,13 @@
Function::Create(cast<FunctionType>(I->getType()->getElementType()),
GlobalValue::ExternalLinkage, I->getName(), New);
NF->copyAttributesFrom(I);
- ValueMap[I] = NF;
+ VMap[I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I)
- ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+ VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
I->getName(), NULL, New);
// Now that all of the things that global variable initializer can refer to
@@ -86,10 +86,10 @@
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- ValueMap)));
+ VMap)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -98,17 +98,17 @@
// Similarly, copy over function bodies now...
//
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *F = cast<Function>(ValueMap[I]);
+ Function *F = cast<Function>(VMap[I]);
if (!I->isDeclaration()) {
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
++J) {
DestI->setName(J->getName());
- ValueMap[J] = DestI++;
+ VMap[J] = DestI++;
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, ValueMap, Returns);
+ CloneFunctionInto(F, I, VMap, Returns);
}
F->setLinkage(I->getLinkage());
@@ -117,11 +117,37 @@
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
}
-
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ SmallVector<MDNode*, 4> MDs;
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
+ NamedMDNode::Create(New->getContext(), NMD.getName(),
+ MDs.data(), MDs.size(), New);
+ }
+
+ // Update metadata attach with instructions.
+ for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end();
+ FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
+ BI->getAllMetadata(MDs);
+ for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator
+ MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
+ Value *MappedValue = MapValue(MDI->second, VMap);
+ if (MDI->second != MappedValue && MappedValue)
+ BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
+ }
+ }
return New;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp Fri Jul 2 04:57:13 2010
@@ -63,7 +63,8 @@
// Next, create the new invoke instruction, inserting it at the end
// of the old basic block.
- SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
InvokeInst *II =
InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
0, 0, 0, 0, // EH-FIXME!
@@ -170,7 +171,7 @@
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueMap<const Value*, Value*> &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -193,9 +194,9 @@
for (; I != E; ++I) {
const Value *OrigCall = I->first;
- DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
+ ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
- if (VMI == ValueMap.end() || VMI->second == 0)
+ if (VMI == VMap.end() || VMI->second == 0)
continue;
// If the call was inlined, but then constant folded, there is no edge to
@@ -286,8 +287,8 @@
ClonedCodeInfo InlinedFunctionInfo;
Function::iterator FirstNewBlock;
- { // Scope to destroy ValueMap after cloning.
- DenseMap<const Value*, Value*> ValueMap;
+ { // Scope to destroy VMap after cloning.
+ ValueMap<const Value*, Value*> VMap;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -358,14 +359,14 @@
MustClearTailCallFlags = true;
}
- ValueMap[I] = ActualArg;
+ VMap[I] = ActualArg;
}
// We want the inliner to prune the code as it copies. We would LOVE to
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
&InlinedFunctionInfo, IFI.TD, TheCall);
// Remember the first block that is newly cloned over.
@@ -373,7 +374,7 @@
// Update the callgraph if requested.
if (IFI.CG)
- UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI);
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
}
// If there are any alloca instructions in the block that used to be the entry
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp Fri Jul 2 04:57:13 2010
@@ -192,7 +192,7 @@
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L);
if (Preheader) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
}
@@ -215,7 +215,7 @@
// allowed.
if (!L->contains(*PI)) {
if (RewriteLoopExitBlock(L, ExitBlock)) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
break;
@@ -244,7 +244,7 @@
// loop header.
LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
if (LoopLatch) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp Fri Jul 2 04:57:13 2010
@@ -37,13 +37,13 @@
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
+ ValueMap<const Value *, Value*> &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end())
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end())
I->setOperand(op, It->second);
}
}
@@ -183,7 +183,7 @@
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
- typedef DenseMap<const Value*, Value*> ValueToValueMapTy;
+ typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -205,26 +205,26 @@
for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
E = LoopBlocks.end(); BB != E; ++BB) {
- ValueToValueMapTy ValueMap;
- BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
// Loop over all of the PHI nodes in the block, changing them to use the
// incoming values from the previous block.
if (*BB == Header)
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
if (It > 1 && L->contains(InValI))
InVal = LastValueMap[InValI];
- ValueMap[OrigPHINode[i]] = InVal;
+ VMap[OrigPHINode[i]] = InVal;
New->getInstList().erase(NewPHI);
}
// Update our running map of newest clones
LastValueMap[*BB] = New;
- for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
VI != VE; ++VI)
LastValueMap[VI->first] = VI->second;
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp Fri Jul 2 04:57:13 2010
@@ -310,15 +310,15 @@
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
const Type *Ty = AI->getType();
- // StructType can't be cast, but is a legal argument type, so we have
+ // Aggregate types can't be cast, but are legal argument types, so we have
// to handle them differently. We use an extract/insert pair as a
// lightweight method to achieve the same goal.
- if (isa<StructType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsertPt);
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
Instruction *NI = InsertValueInst::Create(AI, EI, 0);
NI->insertAfter(EI);
AI->replaceAllUsesWith(NI);
- // Set the struct operand of the instructions back to the AllocaInst.
+ // Set the operand of the instructions back to the AllocaInst.
EI->setOperand(0, AI);
NI->setOperand(0, AI);
} else {
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Fri Jul 2 04:57:13 2010
@@ -603,9 +603,8 @@
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
- LiveInBlockWorklist.insert(LiveInBlockWorklist.end(),
- Info.UsingBlocks.begin(), Info.UsingBlocks.end());
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
// If any of the using blocks is also a definition block, check to see if the
// definition occurs before or after the use. If it happens before the use,
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp Fri Jul 2 04:57:13 2010
@@ -28,7 +28,7 @@
// DenseMap. This includes any recursive calls to MapValue.
// Global values and non-function-local metadata do not need to be seeded into
- // the ValueMap if they are using the identity mapping.
+ // the VM if they are using the identity mapping.
if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
(isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
return VMSlot = const_cast<Value*>(V);
@@ -125,11 +125,11 @@
}
/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, ValueMap);
+ Value *V = MapValue(*op, VMap);
assert(V && "Referenced value not in value map!");
*op = V;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h Fri Jul 2 04:57:13 2010
@@ -15,12 +15,12 @@
#ifndef VALUEMAPPER_H
#define VALUEMAPPER_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
namespace llvm {
class Value;
class Instruction;
- typedef DenseMap<const Value *, Value *> ValueToValueMapTy;
+ typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
Value *MapValue(const Value *V, ValueToValueMapTy &VM);
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
Modified: llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp Fri Jul 2 04:57:13 2010
@@ -1419,6 +1419,9 @@
case GlobalValue::ExternalLinkage: break;
case GlobalValue::PrivateLinkage: Out << "private "; break;
case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "linker_private_weak ";
+ break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
@@ -1854,6 +1857,7 @@
default: Out << " cc" << CI->getCallingConv(); break;
}
+ Operand = CI->getCalledValue();
const PointerType *PTy = cast<PointerType>(Operand->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
const Type *RetTy = FTy->getReturnType();
@@ -1877,10 +1881,10 @@
writeOperand(Operand, true);
}
Out << '(';
- for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) {
- if (op > 1)
+ for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ if (op > 0)
Out << ", ";
- writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
}
Out << ')';
if (PAL.getFnAttributes() != Attribute::None)
@@ -1925,10 +1929,10 @@
writeOperand(Operand, true);
}
Out << '(';
- for (unsigned op = 0, Eop = I.getNumOperands() - 4; op < Eop; ++op) {
+ for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1));
+ writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
}
Out << ')';
Modified: llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp Fri Jul 2 04:57:13 2010
@@ -18,6 +18,7 @@
#include "llvm/Module.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/IRBuilder.h"
#include <cstring>
@@ -314,7 +315,8 @@
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
-
+ ImmutableCallSite CS(CI);
+
assert(F && "CallInst has no function associated with it.");
if (!NewFn) {
@@ -344,11 +346,11 @@
if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
std::vector<Constant*> Idxs;
- Value *Op0 = CI->getOperand(1);
+ Value *Op0 = CI->getArgOperand(0);
ShuffleVectorInst *SI = NULL;
if (isLoadH || isLoadL) {
Value *Op1 = UndefValue::get(Op0->getType());
- Value *Addr = new BitCastInst(CI->getOperand(2),
+ Value *Addr = new BitCastInst(CI->getArgOperand(1),
Type::getDoublePtrTy(C),
"upgraded.", CI);
Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
@@ -381,7 +383,7 @@
SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
} else if (isMovSD ||
isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
- Value *Op1 = CI->getOperand(2);
+ Value *Op1 = CI->getArgOperand(1);
if (isMovSD) {
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
@@ -395,8 +397,8 @@
Value *Mask = ConstantVector::get(Idxs);
SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
} else if (isShufPD) {
- Value *Op1 = CI->getOperand(2);
- unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
((MaskVal >> 1) & 1)+2));
@@ -416,8 +418,8 @@
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.sse41.pmulld") {
// Upgrade this set of intrinsics into vector multiplies.
- Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
- CI->getOperand(2),
+ Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
+ CI->getArgOperand(1),
CI->getName(),
CI);
// Fix up all the uses with our new multiply.
@@ -427,9 +429,9 @@
// Remove upgraded multiply.
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.ssse3.palign.r") {
- Value *Op1 = CI->getOperand(1);
- Value *Op2 = CI->getOperand(2);
- Value *Op3 = CI->getOperand(3);
+ Value *Op1 = CI->getArgOperand(0);
+ Value *Op2 = CI->getArgOperand(1);
+ Value *Op3 = CI->getArgOperand(2);
unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
Value *Rep;
IRBuilder<> Builder(C);
@@ -483,9 +485,9 @@
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
- Value *Op1 = CI->getOperand(1);
- Value *Op2 = CI->getOperand(2);
- Value *Op3 = CI->getOperand(3);
+ Value *Op1 = CI->getArgOperand(0);
+ Value *Op2 = CI->getArgOperand(1);
+ Value *Op3 = CI->getArgOperand(2);
unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
Value *Rep;
IRBuilder<> Builder(C);
@@ -556,10 +558,10 @@
case Intrinsic::x86_mmx_psrl_w: {
Value *Operands[2];
- Operands[0] = CI->getOperand(1);
+ Operands[0] = CI->getArgOperand(0);
// Cast the second parameter to the correct type.
- BitCastInst *BC = new BitCastInst(CI->getOperand(2),
+ BitCastInst *BC = new BitCastInst(CI->getArgOperand(1),
NewFn->getFunctionType()->getParamType(1),
"upgraded.", CI);
Operands[1] = BC;
@@ -583,9 +585,8 @@
case Intrinsic::ctlz:
case Intrinsic::ctpop:
case Intrinsic::cttz: {
- // Build a small vector of the 1..(N-1) operands, which are the
- // parameters.
- SmallVector<Value*, 8> Operands(CI->op_begin()+1, CI->op_end());
+ // Build a small vector of the original arguments.
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
// Construct a new CallInst
CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
@@ -620,7 +621,7 @@
case Intrinsic::eh_selector:
case Intrinsic::eh_typeid_for: {
// Only the return type changed.
- SmallVector<Value*, 8> Operands(CI->op_begin() + 1, CI->op_end());
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
"upgraded." + CI->getName(), CI);
NewCI->setTailCall(CI->isTailCall());
@@ -643,8 +644,8 @@
case Intrinsic::memset: {
// Add isVolatile
const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
- Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), CI->getOperand(4),
+ Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3),
llvm::ConstantInt::get(I1Ty, 0) };
CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
CI->getName(), CI);
@@ -726,7 +727,8 @@
if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
if (!Declare->use_empty()) {
DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
- if (!isa<MDNode>(DDI->getOperand(1)) ||!isa<MDNode>(DDI->getOperand(2))) {
+ if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+ !isa<MDNode>(DDI->getArgOperand(1))) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
CI->eraseFromParent();
Modified: llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp Fri Jul 2 04:57:13 2010
@@ -1817,8 +1817,15 @@
return Constant::getAllOnesValue(ResultTy);
// Handle some degenerate cases first
- if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+ return UndefValue::get(ResultTy);
+ // Otherwise, pick the same value as the non-undef operand, and fold
+ // it to true or false.
return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+ }
// No compile-time operations on this type yet.
if (C1->getType()->isPPC_FP128Ty())
@@ -2194,7 +2201,7 @@
}
NewIndices.push_back(Combined);
- NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx);
+ NewIndices.append(Idxs+1, Idxs+NumIdx);
return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
&NewIndices[0],
Modified: llvm/branches/wendling/eh/lib/VMCore/Core.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Core.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Core.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Core.cpp Fri Jul 2 04:57:13 2010
@@ -1058,6 +1058,8 @@
return LLVMPrivateLinkage;
case GlobalValue::LinkerPrivateLinkage:
return LLVMLinkerPrivateLinkage;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ return LLVMLinkerPrivateWeakLinkage;
case GlobalValue::DLLImportLinkage:
return LLVMDLLImportLinkage;
case GlobalValue::DLLExportLinkage:
@@ -1108,6 +1110,9 @@
case LLVMLinkerPrivateLinkage:
GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
break;
+ case LLVMLinkerPrivateWeakLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+ break;
case LLVMDLLImportLinkage:
GV->setLinkage(GlobalValue::DLLImportLinkage);
break;
Modified: llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp Fri Jul 2 04:57:13 2010
@@ -421,6 +421,7 @@
case Store:
case Ret:
case Br:
+ case IndirectBr:
case Switch:
case Unwind:
case Unreachable:
Modified: llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp Fri Jul 2 04:57:13 2010
@@ -33,7 +33,9 @@
User::op_iterator CallSite::getCallee() const {
Instruction *II(getInstruction());
return isCall()
- ? cast<CallInst>(II)->op_begin()
+ ? (CallInst::ArgOffset
+ ? cast</*FIXME: CallInst*/User>(II)->op_begin()
+ : cast</*FIXME: CallInst*/User>(II)->op_end() - 1)
: cast<InvokeInst>(II)->op_end() - 4; // Skip PersFn, BB, BB, Function
}
@@ -231,8 +233,7 @@
void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert(NumOperands == NumParams+1 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
+ Op<ArgOffset -1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -245,16 +246,15 @@
assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Params[i]->getType()) &&
"Calling a function with a bad signature!");
- OL[i+1] = Params[i];
+ OperandList[i + ArgOffset] = Params[i];
}
}
void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
assert(NumOperands == 3 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
- OL[1] = Actual1;
- OL[2] = Actual2;
+ Op<ArgOffset -1>() = Func;
+ Op<ArgOffset + 0>() = Actual1;
+ Op<ArgOffset + 1>() = Actual2;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -273,9 +273,8 @@
void CallInst::init(Value *Func, Value *Actual) {
assert(NumOperands == 2 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
- OL[1] = Actual;
+ Op<ArgOffset -1>() = Func;
+ Op<ArgOffset + 0>() = Actual;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -291,8 +290,7 @@
void CallInst::init(Value *Func) {
assert(NumOperands == 1 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
+ Op<ArgOffset -1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -1479,7 +1477,7 @@
Op<0>() = Agg;
Op<1>() = Val;
- Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ Indices.append(Idx, Idx + NumIdx);
setName(Name);
}
@@ -1532,7 +1530,7 @@
const Twine &Name) {
assert(NumOperands == 1 && "NumOperands not initialized?");
- Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ Indices.append(Idx, Idx + NumIdx);
setName(Name);
}
Modified: llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp Fri Jul 2 04:57:13 2010
@@ -54,7 +54,7 @@
///
Value *DbgDeclareInst::getAddress() const {
- if (MDNode* MD = cast_or_null<MDNode>(getOperand(1)))
+ if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
return MD->getOperand(0);
else
return NULL;
@@ -65,9 +65,9 @@
///
const Value *DbgValueInst::getValue() const {
- return cast<MDNode>(getOperand(1))->getOperand(0);
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
}
Value *DbgValueInst::getValue() {
- return cast<MDNode>(getOperand(1))->getOperand(0);
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
}
Modified: llvm/branches/wendling/eh/lib/VMCore/Module.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Module.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Module.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Module.cpp Fri Jul 2 04:57:13 2010
@@ -312,15 +312,11 @@
/// getNamedMetadata - Return the first NamedMDNode in the module with the
/// specified name. This method returns null if a NamedMDNode with the
-//// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
- return NamedMDSymTab->lookup(Name);
-}
-
-NamedMDNode *Module::getNamedMetadataUsingTwine(Twine Name) const {
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
SmallString<256> NameData;
StringRef NameRef = Name.toStringRef(NameData);
- return NamedMDSymTab->lookup(NameRef);
+ return NamedMDSymTab->lookup(NameRef);
}
/// getOrInsertNamedMetadata - Return the first named MDNode in the module
Modified: llvm/branches/wendling/eh/lib/VMCore/Pass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Pass.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Pass.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Pass.cpp Fri Jul 2 04:57:13 2010
@@ -35,6 +35,15 @@
// Pass Implementation
//
+Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) {
+ assert(pid && "pid cannot be 0");
+}
+
+Pass::Pass(PassKind K, const void *pid)
+ : Resolver(0), PassID((intptr_t)pid), Kind(K) {
+ assert(pid && "pid cannot be 0");
+}
+
// Force out-of-line virtual method.
Pass::~Pass() {
delete Resolver;
@@ -92,6 +101,23 @@
// By default, don't do anything.
}
+void *Pass::getAdjustedAnalysisPointer(const PassInfo *) {
+ return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+ return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+ return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+ assert(!Resolver && "Resolver is already set");
+ Resolver = AR;
+}
+
// print - Print out the internal state of the pass. This is called by Analyze
// to print out the contents of an analysis. Otherwise it is not necessary to
// implement this method.
@@ -364,6 +390,14 @@
getPassRegistrar()->UnregisterPass(*this);
}
+Pass *PassInfo::createPass() const {
+ assert((!isAnalysisGroup() || NormalCtor) &&
+ "No default implementation found for analysis group!");
+ assert(NormalCtor &&
+ "Cannot call createPass on PassInfo without default ctor!");
+ return NormalCtor();
+}
+
//===----------------------------------------------------------------------===//
// Analysis Group Implementation Code
//===----------------------------------------------------------------------===//
@@ -467,4 +501,15 @@
GetCFGOnlyPasses(Preserved).enumeratePasses();
}
+AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) {
+ assert(ID && "Pass class not registered!");
+ Required.push_back(ID);
+ return *this;
+}
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) {
+ assert(ID && "Pass class not registered!");
+ Required.push_back(ID);
+ RequiredTransitive.push_back(ID);
+ return *this;
+}
Modified: llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp Fri Jul 2 04:57:13 2010
@@ -1147,6 +1147,11 @@
llvm_unreachable("Unable to schedule pass");
}
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) {
+ assert(0 && "Unable to find on the fly pass");
+ return NULL;
+}
+
// Destructor
PMDataManager::~PMDataManager() {
for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),
Modified: llvm/branches/wendling/eh/lib/VMCore/Value.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Value.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Value.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Value.cpp Fri Jul 2 04:57:13 2010
@@ -322,7 +322,13 @@
Value *Value::stripPointerCasts() {
if (!getType()->isPointerTy())
return this;
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+
Value *V = this;
+ Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
if (!GEP->hasAllZeroIndices())
@@ -338,7 +344,9 @@
return V;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (1);
+ } while (Visited.insert(V));
+
+ return V;
}
Value *Value::getUnderlyingObject(unsigned MaxLookup) {
Modified: llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp Fri Jul 2 04:57:13 2010
@@ -1641,16 +1641,16 @@
default:
break;
case Intrinsic::dbg_declare: { // llvm.dbg.declare
- Assert1(CI.getOperand(1) && isa<MDNode>(CI.getOperand(1)),
+ Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", &CI);
- MDNode *MD = cast<MDNode>(CI.getOperand(1));
+ MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
Assert1(MD->getNumOperands() == 1,
"invalid llvm.dbg.declare intrinsic call 2", &CI);
} break;
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
- Assert1(isa<ConstantInt>(CI.getOperand(4)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
"alignment argument of memory intrinsics must be a constant int",
&CI);
break;
@@ -1659,10 +1659,10 @@
case Intrinsic::gcread:
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
- dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
+ dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
"llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
- Assert1(isa<Constant>(CI.getOperand(2)),
+ Assert1(isa<Constant>(CI.getArgOperand(1)),
"llvm.gcroot parameter #2 must be a constant.", &CI);
}
@@ -1670,32 +1670,32 @@
"Enclosing function does not use GC.", &CI);
break;
case Intrinsic::init_trampoline:
- Assert1(isa<Function>(CI.getOperand(2)->stripPointerCasts()),
+ Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
"llvm.init_trampoline parameter #2 must resolve to a function.",
&CI);
break;
case Intrinsic::prefetch:
- Assert1(isa<ConstantInt>(CI.getOperand(2)) &&
- isa<ConstantInt>(CI.getOperand(3)) &&
- cast<ConstantInt>(CI.getOperand(2))->getZExtValue() < 2 &&
- cast<ConstantInt>(CI.getOperand(3))->getZExtValue() < 4,
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+ isa<ConstantInt>(CI.getArgOperand(2)) &&
+ cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
"invalid arguments to llvm.prefetch",
&CI);
break;
case Intrinsic::stackprotector:
- Assert1(isa<AllocaInst>(CI.getOperand(2)->stripPointerCasts()),
+ Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
"llvm.stackprotector parameter #2 must resolve to an alloca.",
&CI);
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- Assert1(isa<ConstantInt>(CI.getOperand(1)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
"size argument of memory use markers must be a constant integer",
&CI);
break;
case Intrinsic::invariant_end:
- Assert1(isa<ConstantInt>(CI.getOperand(2)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
"llvm.invariant.end parameter #2 must be a constant integer", &CI);
break;
}
Modified: llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll (original)
+++ llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll Fri Jul 2 04:57:13 2010
@@ -1,8 +1,9 @@
; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
; RUN: |& FileCheck %s
-; At the time of this writing, -basicaa only misses the example of the form
-; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references.
+; At the time of this writing, -basicaa misses the example of the form
+; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references,
+; and the example of the form A[0] != A[i+1], where i+1 is known to be positive.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
@@ -189,6 +190,27 @@
ret void
}
+; TODO: This is theoretically provable to be NoAlias.
+; CHECK: Function: nonnegative: 2 pointers, 0 call sites
+; CHECK: MayAlias: i64* %arrayidx, i64* %p
+
+define void @nonnegative(i64* %p) nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+ %inc = add nsw i64 %i, 1 ; <i64> [#uses=2]
+ %arrayidx = getelementptr inbounds i64* %p, i64 %inc
+ store i64 0, i64* %arrayidx
+ %tmp6 = load i64* %p ; <i64> [#uses=1]
+ %cmp = icmp slt i64 %inc, %tmp6 ; <i1> [#uses=1]
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
; CHECK: 13 no alias responses
-; CHECK: 26 may alias responses
+; CHECK: 27 may alias responses
; CHECK: 18 must alias responses
Modified: llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll (original)
+++ llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll Fri Jul 2 04:57:13 2010
@@ -74,3 +74,53 @@
return:
ret void
}
+
+; Trip counts for non-polynomial iterations. It's theoretically possible
+; to compute a maximum count for these, but short of that, ScalarEvolution
+; should return unknown.
+
+; PR7416
+; CHECK: Determining loop execution counts for: @nonpolynomial
+; CHECK-NEXT: Loop %loophead: Unpredictable backedge-taken count
+; CHECK-NEXT: Loop %loophead: Unpredictable max backedge-taken count
+
+declare i1 @g() nounwind
+
+define void @nonpolynomial() {
+entry:
+ br label %loophead
+loophead:
+ %x = phi i32 [0, %entry], [%x.1, %bb1], [%x.2, %bb2]
+ %y = icmp slt i32 %x, 100
+ br i1 %y, label %loopbody, label %retbb
+loopbody:
+ %z = call i1 @g()
+ br i1 %z, label %bb1, label %bb2
+bb1:
+ %x.1 = add i32 %x, 2
+ br label %loophead
+bb2:
+ %x.2 = add i32 %x, 3
+ br label %loophead
+retbb:
+ ret void
+}
+
+; PHI nodes with all constant operands.
+
+; CHECK: Determining loop execution counts for: @constant_phi_operands
+; CHECK: Loop %loop: backedge-taken count is 1
+; CHECK: Loop %loop: max backedge-taken count is 1
+
+define void @constant_phi_operands() nounwind {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 1, %loop ], [ 0, %entry ]
+ %exitcond = icmp eq i64 %i, 1
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll Fri Jul 2 04:57:13 2010
@@ -2,7 +2,7 @@
; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
; CHECK: .globl l_objc_msgSend_fixup_alloc
; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll Fri Jul 2 04:57:13 2010
@@ -1,11 +1,12 @@
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
; rdar://8015977
; rdar://8020118
define i8* @rt0(i32 %x) nounwind readnone {
entry:
; CHECK: rt0:
+; CHECK: {r7, lr}
; CHECK: mov r0, lr
%0 = tail call i8* @llvm.returnaddress(i32 0)
ret i8* %0
@@ -14,6 +15,7 @@
define i8* @rt2() nounwind readnone {
entry:
; CHECK: rt2:
+; CHECK: {r7, lr}
; CHECK: ldr r0, [r7]
; CHECK: ldr r0, [r0]
; CHECK: ldr r0, [r0, #4]
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll Fri Jul 2 04:57:13 2010
@@ -7,22 +7,25 @@
declare void @g(i32, i32, i32, i32)
-define void @f() {
+define void @t1() {
+; CHECKELF: t1:
; CHECKELF: PLT
call void @g( i32 1, i32 2, i32 3, i32 4 )
ret void
}
-define void @g.upgrd.1() {
+define void @t2() {
+; CHECKV4: t2:
; CHECKV4: bx r0 @ TAILCALL
+; CHECKV5: t2:
; CHECKV5: bx r0 @ TAILCALL
%tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
-define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: m_231b
+define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: t3:
; CHECKV4: bx r{{.*}}
BB0:
%5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll Fri Jul 2 04:57:13 2010
@@ -10,3 +10,19 @@
%asmtmp = call %struct0 asm sideeffect "...", "=&r,=&r,r,Ir,r,~{cc},~{memory}"(i32* undef, i32 undef, i32 1) nounwind ; <%0> [#uses=0]
unreachable
}
+
+ at .str523 = private constant [256 x i8] c"<Unknown>\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1]
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; This function uses the scavenger for an ADDri instruction.
+; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit.
+define arm_apcscc void @scavence_ADDri() nounwind {
+entry:
+ %letter = alloca i8 ; <i8*> [#uses=0]
+ %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1]
+ %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0]
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+ %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+ unreachable
+}
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll Fri Jul 2 04:57:13 2010
@@ -1,10 +1,8 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep bxlt %t | count 1
-; RUN: grep bxgt %t | count 1
-; RUN: not grep bxge %t
-; RUN: not grep bxle %t
+; RUN: llc < %s -march=arm | FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: bxlt lr
%tmp2 = icmp sgt i32 %c, 10
%tmp5 = icmp slt i32 %d, 4
%tmp8 = or i1 %tmp5, %tmp2
@@ -21,6 +19,13 @@
}
define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t2:
+; CHECK: bxgt lr
+; CHECK: cmp
+; CHECK: addge
+; CHECK: subge
+; CHECK-NOT: bxge lr
+; CHECK: bx lr
%tmp2 = icmp sgt i32 %c, 10
%tmp5 = icmp slt i32 %d, 4
%tmp8 = and i1 %tmp5, %tmp2
Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN: grep cmpne | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN: grep bhi | count 1
-; Here, tail call wins over eliminating branches. It is 1 fewer instruction
-; and removes all stack accesses, so seems like a win.
-
-define void @foo(i32 %X, i32 %Y) {
-entry:
- %tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
- %tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
- %tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
- br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
-
-cond_true: ; preds = %entry
- %tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
- ret void
-
-UnifiedReturnBlock: ; preds = %entry
- ret void
-}
-
-declare i32 @bar(...)
Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll (removed)
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
-; RUN: grep mov | count 2
-
-define i32 @test(i32 %x) {
- %tmp = trunc i32 %x to i16 ; <i16> [#uses=1]
- %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; <i32> [#uses=1]
- ret i32 %tmp2
-}
-
-declare i32 @f(i32, i16)
Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-
- at X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
-
-define i32 @t1() {
-; CHECK: t1:
-; CHECK: ldmia
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
- ret i32 %tmp4
-}
-
-define i32 @t2() {
-; CHECK: t2:
-; CHECK: ldmia
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
- %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
- ret i32 %tmp6
-}
-
-define i32 @t3() {
-; CHECK: t3:
-; CHECK: ldmib
-; CHECK: b.w _f2 @ TAILCALL
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
- ret i32 %tmp6
-}
-
-declare i32 @f1(i32, i32)
-
-declare i32 @f2(i32, i32, i32)
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll Fri Jul 2 04:57:13 2010
@@ -23,10 +23,10 @@
define i32 @f2(i64 %x, i64 %y) {
; CHECK: f2
; CHECK: mov r0, r0, lsr r2
-; CHECK-NEXT: rsb r12, r2, #32
+; CHECK-NEXT: rsb r3, r2, #32
; CHECK-NEXT: sub r2, r2, #32
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: orr r0, r0, r1, lsl r12
+; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: movge r0, r1, asr r2
%a = ashr i64 %x, %y
%b = trunc i64 %a to i32
@@ -36,10 +36,10 @@
define i32 @f3(i64 %x, i64 %y) {
; CHECK: f3
; CHECK: mov r0, r0, lsr r2
-; CHECK-NEXT: rsb r12, r2, #32
+; CHECK-NEXT: rsb r3, r2, #32
; CHECK-NEXT: sub r2, r2, #32
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: orr r0, r0, r1, lsl r12
+; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: movge r0, r1, lsr r2
%a = lshr i64 %x, %y
%b = trunc i64 %a to i32
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll Fri Jul 2 04:57:13 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {38.*Number of machine instrs printed}
; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
; This test really wants to check that the resultant "cond_true" block only
; has a single store in it, and that cond_true55 only has code to materialize
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll Fri Jul 2 04:57:13 2010
@@ -626,9 +626,11 @@
; LSR should use count-down iteration to avoid requiring the trip count
; in a register, and it shouldn't require any reloads here.
-; CHECK: subs r3, #1
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: bne.w
+; CHECK: @ %bb24
+; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1
+; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0
+; CHECK-NEXT: bne.w
%92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1]
%indvar.next79 = add i32 %indvar78, 1 ; <i32> [#uses=1]
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll Fri Jul 2 04:57:13 2010
@@ -250,13 +250,13 @@
br label %8
; <label>:6 ; preds = %8
- br i1 undef, label %7, label %10
+ br label %7
; <label>:7 ; preds = %6
br label %8
; <label>:8 ; preds = %7, %2
- br i1 undef, label %6, label %9
+ br label %6
; <label>:9 ; preds = %8
ret float undef
Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll Fri Jul 2 04:57:13 2010
@@ -204,8 +204,8 @@
define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
;CHECK: test_vset_lanef32:
-;CHECK: vmov.f32
-;CHECK: vmov.f32
+;CHECK: vmov.f32 s3, s0
+;CHECK: vmov d0, d1
entry:
%0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
ret <2 x float> %0
Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll Fri Jul 2 04:57:13 2010
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
; RUN: grep brsl %t1.s | count 1
; RUN: grep brasl %t1.s | count 1
; RUN: grep stqd %t1.s | count 80
+; RUN: llc < %s -march=cellspu | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@@ -16,6 +17,8 @@
declare void @extern_stub_1(i32, i32)
define i32 @stub_1(i32 %x, float %y) {
+ ; CHECK: il $3, 0
+ ; CHECK: bi $lr
entry:
ret i32 0
}
Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll Fri Jul 2 04:57:13 2010
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 > %t2.s
+; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
; RUN: grep bisl %t1.s | count 7
; RUN: grep ila %t1.s | count 1
; RUN: grep rotqby %t1.s | count 5
Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll Fri Jul 2 04:57:13 2010
@@ -2,9 +2,9 @@
; This is to check that emitting jumptables doesn't crash llc
define i32 @test(i32 %param) {
entry:
-;CHECK: ai $4, $3, -1
-;CHECK: clgti $5, $4, 3
-;CHECK: brnz $5,.LBB0_2
+;CHECK: ai {{\$.}}, $3, -1
+;CHECK: clgti {{\$., \$.}}, 3
+;CHECK: brnz {{\$.}},.LBB0_2
switch i32 %param, label %bb1 [
i32 1, label %bb3
i32 2, label %bb2
Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll Fri Jul 2 04:57:13 2010
@@ -22,17 +22,19 @@
declare <4 x i32>* @getv4f32ptr()
define <4 x i32> @func() {
- ;CHECK: brasl
- ;CHECK: lr {{\$[0-9]*, \$3}}
- ;CHECK: brasl
- %rv1 = call <4 x i32>* @getv4f32ptr()
- %rv2 = call <4 x i32>* @getv4f32ptr()
- %rv3 = load <4 x i32>* %rv1
- ret <4 x i32> %rv3
+ ;CHECK: brasl
+ ; we need to have some instruction to move the result to safety.
+ ; which instruction (lr, stqd...) depends on the regalloc
+ ;CHECK: {{.*}}
+ ;CHECK: brasl
+ %rv1 = call <4 x i32>* @getv4f32ptr()
+ %rv2 = call <4 x i32>* @getv4f32ptr()
+ %rv3 = load <4 x i32>* %rv1
+ ret <4 x i32> %rv3
}
define <4 x float> @load_undef(){
- ;CHECK lqd $3, 0($3)
+ ; CHECK: lqd $3, 0($3)
%val = load <4 x float>* undef
ret <4 x float> %val
}
Removed: llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll (removed)
@@ -1,8 +0,0 @@
-; RUN: llc -march=x86 < %s
-
-%vec = type <9 x float>
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
- %result = fdiv %vec %p1, %p2
- ret %vec %result
-}
Removed: llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llvm-as < %s | lli
-
-%vec = type <6 x float>
-
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
- %result = fdiv %vec %p1, %p2
- ret %vec %result
-}
-
- at a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
- at b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
-
-; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
-; main() returns 0 if the result is expected and 1 otherwise
-define i32 @main() nounwind {
-entry:
- %avec = load %vec* @a
- %bvec = load %vec* @b
-
- %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
- br label %loop
-loop:
- %idx = phi i32 [0, %entry], [%nextInd, %looptail]
- %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
- %elem = extractelement %vec %res, i32 %idx
- %expcmp = fcmp oeq float %elem, %expected
- br i1 %expcmp, label %looptail, label %return
-looptail:
- %nextExpected = fmul float %expected, 2.0
- %nextInd = add i32 %idx, 1
- %cmp = icmp slt i32 %nextInd, 6
- br i1 %cmp, label %loop, label %return
-return:
- %retval = phi i32 [0, %looptail], [1, %loop]
- ret i32 %retval
-}
Removed: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
- at .str242 = external constant [3 x i8] ; <[3 x i8]*> [#uses=1]
-
-define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
-entry:
- %items = alloca [10000 x i8*], align 16 ; <[10000 x i8*]*> [#uses=0]
- %tmp86 = add i32 0, -1 ; <i32> [#uses=1]
- br i1 false, label %cond_true94, label %cond_next99
-cond_true94: ; preds = %entry
- %tmp98 = call i32 (i8*, ...)* @printf( i8* getelementptr ([3 x i8]* @.str242, i32 0, i32 0), i8* null ) ; <i32> [#uses=0]
- %tmp20971 = icmp sgt i32 %tmp86, 0 ; <i1> [#uses=1]
- br i1 %tmp20971, label %bb101, label %bb212
-cond_next99: ; preds = %entry
- ret void
-bb101: ; preds = %cond_true94
- ret void
-bb212: ; preds = %cond_true94
- ret void
-}
-
-declare i32 @printf(i8*, ...)
Modified: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll Fri Jul 2 04:57:13 2010
@@ -2,7 +2,7 @@
; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
; CHECK: .globl l_objc_msgSend_fixup_alloc
; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
Removed: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll (removed)
@@ -1,433 +0,0 @@
-; RUN: llc < %s -O3 | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
-target triple = "powerpc-apple-darwin9.6"
-
-; There should be no stfs spills
-; CHECK: main:
-; CHECK-NOT: stfs
-; CHECK: .section
-
- at .str66 = external constant [3 x i8], align 4 ; <[3 x i8]*> [#uses=1]
- at .str31 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=1]
- at .str61 = external constant [21 x i8], align 4 ; <[21 x i8]*> [#uses=1]
- at .str101 = external constant [61 x i8], align 4 ; <[61 x i8]*> [#uses=1]
- at .str104 = external constant [31 x i8], align 4 ; <[31 x i8]*> [#uses=1]
- at .str105 = external constant [45 x i8], align 4 ; <[45 x i8]*> [#uses=1]
- at .str112 = external constant [38 x i8], align 4 ; <[38 x i8]*> [#uses=1]
- at .str121 = external constant [36 x i8], align 4 ; <[36 x i8]*> [#uses=1]
- at .str12293 = external constant [67 x i8], align 4 ; <[67 x i8]*> [#uses=1]
- at .str123 = external constant [68 x i8], align 4 ; <[68 x i8]*> [#uses=1]
- at .str124 = external constant [52 x i8], align 4 ; <[52 x i8]*> [#uses=1]
- at .str125 = external constant [51 x i8], align 4 ; <[51 x i8]*> [#uses=1]
-
-define i32 @main(i32 %argc, i8** %argv) noreturn nounwind {
-entry:
- br i1 undef, label %bb4.i1, label %my_fopen.exit
-
-bb4.i1: ; preds = %entry
- unreachable
-
-my_fopen.exit: ; preds = %entry
- br i1 undef, label %bb.i, label %bb1.i
-
-bb.i: ; preds = %my_fopen.exit
- unreachable
-
-bb1.i: ; preds = %my_fopen.exit
- br label %bb134.i
-
-bb2.i: ; preds = %bb134.i
- %0 = icmp eq i32 undef, 0 ; <i1> [#uses=1]
- br i1 %0, label %bb20.i, label %bb21.i
-
-bb20.i: ; preds = %bb2.i
- br label %bb134.i
-
-bb21.i: ; preds = %bb2.i
- %1 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([6 x i8]* @.str31, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
- br i1 undef, label %bb30.i, label %bb31.i
-
-bb30.i: ; preds = %bb21.i
- br label %bb134.i
-
-bb31.i: ; preds = %bb21.i
- br i1 undef, label %bb41.i, label %bb44.i
-
-bb41.i: ; preds = %bb31.i
- %2 = icmp slt i32 undef, %argc ; <i1> [#uses=1]
- br i1 %2, label %bb1.i77.i, label %bb2.i78.i
-
-bb1.i77.i: ; preds = %bb41.i
- %3 = load float* undef, align 4 ; <float> [#uses=2]
- %4 = fcmp ugt float %3, 0.000000e+00 ; <i1> [#uses=1]
- br i1 %4, label %bb43.i, label %bb42.i
-
-bb2.i78.i: ; preds = %bb41.i
- unreachable
-
-bb42.i: ; preds = %bb1.i77.i
- unreachable
-
-bb43.i: ; preds = %bb1.i77.i
- br label %bb134.i
-
-bb44.i: ; preds = %bb31.i
- br i1 undef, label %bb45.i, label %bb49.i
-
-bb45.i: ; preds = %bb44.i
- %5 = icmp slt i32 undef, %argc ; <i1> [#uses=1]
- br i1 %5, label %bb1.i72.i, label %bb2.i73.i
-
-bb1.i72.i: ; preds = %bb45.i
- %6 = load float* undef, align 4 ; <float> [#uses=3]
- %7 = fcmp ult float %6, 1.000000e+00 ; <i1> [#uses=1]
- %or.cond.i = and i1 undef, %7 ; <i1> [#uses=1]
- br i1 %or.cond.i, label %bb48.i, label %bb47.i
-
-bb2.i73.i: ; preds = %bb45.i
- unreachable
-
-bb47.i: ; preds = %bb1.i72.i
- unreachable
-
-bb48.i: ; preds = %bb1.i72.i
- br label %bb134.i
-
-bb49.i: ; preds = %bb44.i
- br i1 undef, label %bb50.i, label %bb53.i
-
-bb50.i: ; preds = %bb49.i
- br i1 false, label %bb1.i67.i, label %bb2.i68.i
-
-bb1.i67.i: ; preds = %bb50.i
- br i1 false, label %read_float_option.exit69.i, label %bb1.i67.bb2.i68_crit_edge.i
-
-bb1.i67.bb2.i68_crit_edge.i: ; preds = %bb1.i67.i
- br label %bb2.i68.i
-
-bb2.i68.i: ; preds = %bb1.i67.bb2.i68_crit_edge.i, %bb50.i
- unreachable
-
-read_float_option.exit69.i: ; preds = %bb1.i67.i
- br i1 undef, label %bb52.i, label %bb51.i
-
-bb51.i: ; preds = %read_float_option.exit69.i
- unreachable
-
-bb52.i: ; preds = %read_float_option.exit69.i
- br label %bb134.i
-
-bb53.i: ; preds = %bb49.i
- %8 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([21 x i8]* @.str61, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
- br i1 false, label %bb89.i, label %bb92.i
-
-bb89.i: ; preds = %bb53.i
- br i1 undef, label %bb1.i27.i, label %bb2.i28.i
-
-bb1.i27.i: ; preds = %bb89.i
- unreachable
-
-bb2.i28.i: ; preds = %bb89.i
- unreachable
-
-bb92.i: ; preds = %bb53.i
- br i1 undef, label %bb93.i, label %bb96.i
-
-bb93.i: ; preds = %bb92.i
- br i1 undef, label %bb1.i22.i, label %bb2.i23.i
-
-bb1.i22.i: ; preds = %bb93.i
- br i1 undef, label %bb95.i, label %bb94.i
-
-bb2.i23.i: ; preds = %bb93.i
- unreachable
-
-bb94.i: ; preds = %bb1.i22.i
- unreachable
-
-bb95.i: ; preds = %bb1.i22.i
- br label %bb134.i
-
-bb96.i: ; preds = %bb92.i
- br i1 undef, label %bb97.i, label %bb100.i
-
-bb97.i: ; preds = %bb96.i
- %9 = icmp slt i32 undef, %argc ; <i1> [#uses=1]
- br i1 %9, label %bb1.i17.i, label %bb2.i18.i
-
-bb1.i17.i: ; preds = %bb97.i
- %10 = call i32 (i8*, i8*, ...)* @"\01_sscanf$LDBL128"(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str66, i32 0, i32 0), float* undef) nounwind ; <i32> [#uses=1]
- %phitmp.i16.i = icmp eq i32 %10, 1 ; <i1> [#uses=1]
- br i1 %phitmp.i16.i, label %read_float_option.exit19.i, label %bb1.i17.bb2.i18_crit_edge.i
-
-bb1.i17.bb2.i18_crit_edge.i: ; preds = %bb1.i17.i
- br label %bb2.i18.i
-
-bb2.i18.i: ; preds = %bb1.i17.bb2.i18_crit_edge.i, %bb97.i
- unreachable
-
-read_float_option.exit19.i: ; preds = %bb1.i17.i
- br i1 false, label %bb99.i, label %bb98.i
-
-bb98.i: ; preds = %read_float_option.exit19.i
- unreachable
-
-bb99.i: ; preds = %read_float_option.exit19.i
- br label %bb134.i
-
-bb100.i: ; preds = %bb96.i
- br i1 false, label %bb101.i, label %bb104.i
-
-bb101.i: ; preds = %bb100.i
- br i1 false, label %bb1.i12.i, label %bb2.i13.i
-
-bb1.i12.i: ; preds = %bb101.i
- br i1 undef, label %bb102.i, label %bb103.i
-
-bb2.i13.i: ; preds = %bb101.i
- unreachable
-
-bb102.i: ; preds = %bb1.i12.i
- unreachable
-
-bb103.i: ; preds = %bb1.i12.i
- br label %bb134.i
-
-bb104.i: ; preds = %bb100.i
- unreachable
-
-bb134.i: ; preds = %bb103.i, %bb99.i, %bb95.i, %bb52.i, %bb48.i, %bb43.i, %bb30.i, %bb20.i, %bb1.i
- %annealing_sched.1.0 = phi float [ 1.000000e+01, %bb1.i ], [ %annealing_sched.1.0, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %annealing_sched.1.0, %bb43.i ], [ %annealing_sched.1.0, %bb48.i ], [ %annealing_sched.1.0, %bb52.i ], [ %annealing_sched.1.0, %bb95.i ], [ %annealing_sched.1.0, %bb99.i ], [ %annealing_sched.1.0, %bb103.i ] ; <float> [#uses=8]
- %annealing_sched.2.0 = phi float [ 1.000000e+02, %bb1.i ], [ %annealing_sched.2.0, %bb20.i ], [ %annealing_sched.2.0, %bb30.i ], [ %3, %bb43.i ], [ %annealing_sched.2.0, %bb48.i ], [ %annealing_sched.2.0, %bb52.i ], [ %annealing_sched.2.0, %bb95.i ], [ %annealing_sched.2.0, %bb99.i ], [ %annealing_sched.2.0, %bb103.i ] ; <float> [#uses=8]
- %annealing_sched.3.0 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %annealing_sched.3.0, %bb20.i ], [ %annealing_sched.3.0, %bb30.i ], [ %annealing_sched.3.0, %bb43.i ], [ %6, %bb48.i ], [ %annealing_sched.3.0, %bb52.i ], [ %annealing_sched.3.0, %bb95.i ], [ %annealing_sched.3.0, %bb99.i ], [ %annealing_sched.3.0, %bb103.i ] ; <float> [#uses=8]
- %annealing_sched.4.0 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %annealing_sched.4.0, %bb20.i ], [ %annealing_sched.4.0, %bb30.i ], [ %annealing_sched.4.0, %bb43.i ], [ %annealing_sched.4.0, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %annealing_sched.4.0, %bb95.i ], [ %annealing_sched.4.0, %bb99.i ], [ %annealing_sched.4.0, %bb103.i ] ; <float> [#uses=8]
- %router_opts.0.0 = phi float [ 0.000000e+00, %bb1.i ], [ %router_opts.0.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.0.0, %bb43.i ], [ %router_opts.0.0, %bb48.i ], [ %router_opts.0.0, %bb52.i ], [ %router_opts.0.0, %bb95.i ], [ %router_opts.0.0, %bb99.i ], [ %router_opts.0.0, %bb103.i ] ; <float> [#uses=8]
- %router_opts.1.0 = phi float [ 5.000000e-01, %bb1.i ], [ %router_opts.1.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.1.0, %bb43.i ], [ %router_opts.1.0, %bb48.i ], [ %router_opts.1.0, %bb52.i ], [ undef, %bb95.i ], [ %router_opts.1.0, %bb99.i ], [ %router_opts.1.0, %bb103.i ] ; <float> [#uses=7]
- %router_opts.2.0 = phi float [ 1.500000e+00, %bb1.i ], [ %router_opts.2.0, %bb20.i ], [ %router_opts.2.0, %bb30.i ], [ %router_opts.2.0, %bb43.i ], [ %router_opts.2.0, %bb48.i ], [ %router_opts.2.0, %bb52.i ], [ %router_opts.2.0, %bb95.i ], [ undef, %bb99.i ], [ %router_opts.2.0, %bb103.i ] ; <float> [#uses=8]
- %router_opts.3.0 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %router_opts.3.0, %bb20.i ], [ %router_opts.3.0, %bb30.i ], [ %router_opts.3.0, %bb43.i ], [ %router_opts.3.0, %bb48.i ], [ %router_opts.3.0, %bb52.i ], [ %router_opts.3.0, %bb95.i ], [ %router_opts.3.0, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
- %11 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %11, %bb20.i ], [ %11, %bb30.i ], [ %11, %bb43.i ], [ %11, %bb48.i ], [ %11, %bb52.i ], [ %11, %bb95.i ], [ %11, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
- %12 = phi float [ 1.500000e+00, %bb1.i ], [ %12, %bb20.i ], [ %12, %bb30.i ], [ %12, %bb43.i ], [ %12, %bb48.i ], [ %12, %bb52.i ], [ %12, %bb95.i ], [ undef, %bb99.i ], [ %12, %bb103.i ] ; <float> [#uses=8]
- %13 = phi float [ 5.000000e-01, %bb1.i ], [ %13, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %13, %bb43.i ], [ %13, %bb48.i ], [ %13, %bb52.i ], [ undef, %bb95.i ], [ %13, %bb99.i ], [ %13, %bb103.i ] ; <float> [#uses=7]
- %14 = phi float [ 0.000000e+00, %bb1.i ], [ %14, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %14, %bb43.i ], [ %14, %bb48.i ], [ %14, %bb52.i ], [ %14, %bb95.i ], [ %14, %bb99.i ], [ %14, %bb103.i ] ; <float> [#uses=8]
- %15 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %15, %bb20.i ], [ %15, %bb30.i ], [ %15, %bb43.i ], [ %6, %bb48.i ], [ %15, %bb52.i ], [ %15, %bb95.i ], [ %15, %bb99.i ], [ %15, %bb103.i ] ; <float> [#uses=8]
- %16 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %16, %bb20.i ], [ %16, %bb30.i ], [ %16, %bb43.i ], [ %16, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %16, %bb95.i ], [ %16, %bb99.i ], [ %16, %bb103.i ] ; <float> [#uses=8]
- %17 = phi float [ 1.000000e+01, %bb1.i ], [ %17, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %17, %bb43.i ], [ %17, %bb48.i ], [ %17, %bb52.i ], [ %17, %bb95.i ], [ %17, %bb99.i ], [ %17, %bb103.i ] ; <float> [#uses=8]
- %18 = icmp slt i32 undef, %argc ; <i1> [#uses=1]
- br i1 %18, label %bb2.i, label %bb135.i
-
-bb135.i: ; preds = %bb134.i
- br i1 undef, label %bb141.i, label %bb142.i
-
-bb141.i: ; preds = %bb135.i
- unreachable
-
-bb142.i: ; preds = %bb135.i
- br i1 undef, label %bb145.i, label %bb144.i
-
-bb144.i: ; preds = %bb142.i
- unreachable
-
-bb145.i: ; preds = %bb142.i
- br i1 undef, label %bb146.i, label %bb147.i
-
-bb146.i: ; preds = %bb145.i
- unreachable
-
-bb147.i: ; preds = %bb145.i
- br i1 undef, label %bb148.i, label %bb155.i
-
-bb148.i: ; preds = %bb147.i
- br label %bb155.i
-
-bb155.i: ; preds = %bb148.i, %bb147.i
- br i1 undef, label %bb156.i, label %bb161.i
-
-bb156.i: ; preds = %bb155.i
- unreachable
-
-bb161.i: ; preds = %bb155.i
- br i1 undef, label %bb162.i, label %bb163.i
-
-bb162.i: ; preds = %bb161.i
- %19 = fpext float %17 to double ; <double> [#uses=1]
- %20 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([61 x i8]* @.str101, i32 0, i32 0), double %19) nounwind ; <i32> [#uses=0]
- unreachable
-
-bb163.i: ; preds = %bb161.i
- %21 = fpext float %16 to double ; <double> [#uses=1]
- %22 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([31 x i8]* @.str104, i32 0, i32 0), double %21) nounwind ; <i32> [#uses=0]
- %23 = fpext float %15 to double ; <double> [#uses=1]
- %24 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([45 x i8]* @.str105, i32 0, i32 0), double %23) nounwind ; <i32> [#uses=0]
- %25 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([38 x i8]* @.str112, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
- br i1 undef, label %parse_command.exit, label %bb176.i
-
-bb176.i: ; preds = %bb163.i
- br i1 undef, label %bb177.i, label %bb178.i
-
-bb177.i: ; preds = %bb176.i
- unreachable
-
-bb178.i: ; preds = %bb176.i
- %26 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([36 x i8]* @.str121, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
- %27 = fpext float %14 to double ; <double> [#uses=1]
- %28 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([67 x i8]* @.str12293, i32 0, i32 0), double %27) nounwind ; <i32> [#uses=0]
- %29 = fpext float %13 to double ; <double> [#uses=1]
- %30 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([68 x i8]* @.str123, i32 0, i32 0), double %29) nounwind ; <i32> [#uses=0]
- %31 = fpext float %12 to double ; <double> [#uses=1]
- %32 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([52 x i8]* @.str124, i32 0, i32 0), double %31) nounwind ; <i32> [#uses=0]
- %33 = fpext float %11 to double ; <double> [#uses=1]
- %34 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([51 x i8]* @.str125, i32 0, i32 0), double %33) nounwind ; <i32> [#uses=0]
- unreachable
-
-parse_command.exit: ; preds = %bb163.i
- br i1 undef, label %bb4.i152.i, label %my_fopen.exit.i
-
-bb4.i152.i: ; preds = %parse_command.exit
- unreachable
-
-my_fopen.exit.i: ; preds = %parse_command.exit
- br i1 undef, label %bb.i6.i99, label %bb49.preheader.i.i
-
-bb.i6.i99: ; preds = %my_fopen.exit.i
- br i1 undef, label %bb3.i.i100, label %bb1.i8.i
-
-bb1.i8.i: ; preds = %bb.i6.i99
- unreachable
-
-bb3.i.i100: ; preds = %bb.i6.i99
- unreachable
-
-bb49.preheader.i.i: ; preds = %my_fopen.exit.i
- br i1 undef, label %bb7.i11.i, label %bb50.i.i
-
-bb7.i11.i: ; preds = %bb49.preheader.i.i
- unreachable
-
-bb50.i.i: ; preds = %bb49.preheader.i.i
- br i1 undef, label %bb.i.i.i20.i, label %my_calloc.exit.i.i.i
-
-bb.i.i.i20.i: ; preds = %bb50.i.i
- unreachable
-
-my_calloc.exit.i.i.i: ; preds = %bb50.i.i
- br i1 undef, label %bb.i.i37.i.i, label %alloc_hash_table.exit.i21.i
-
-bb.i.i37.i.i: ; preds = %my_calloc.exit.i.i.i
- unreachable
-
-alloc_hash_table.exit.i21.i: ; preds = %my_calloc.exit.i.i.i
- br i1 undef, label %bb51.i.i, label %bb3.i23.i.i
-
-bb51.i.i: ; preds = %alloc_hash_table.exit.i21.i
- unreachable
-
-bb3.i23.i.i: ; preds = %alloc_hash_table.exit.i21.i
- br i1 undef, label %bb.i8.i.i, label %bb.nph.i.i
-
-bb.nph.i.i: ; preds = %bb3.i23.i.i
- unreachable
-
-bb.i8.i.i: ; preds = %bb3.i.i34.i, %bb3.i23.i.i
- br i1 undef, label %bb3.i.i34.i, label %bb1.i.i32.i
-
-bb1.i.i32.i: ; preds = %bb.i8.i.i
- unreachable
-
-bb3.i.i34.i: ; preds = %bb.i8.i.i
- br i1 undef, label %free_hash_table.exit.i.i, label %bb.i8.i.i
-
-free_hash_table.exit.i.i: ; preds = %bb3.i.i34.i
- br i1 undef, label %check_netlist.exit.i, label %bb59.i.i
-
-bb59.i.i: ; preds = %free_hash_table.exit.i.i
- unreachable
-
-check_netlist.exit.i: ; preds = %free_hash_table.exit.i.i
- br label %bb.i.i3.i
-
-bb.i.i3.i: ; preds = %bb3.i.i4.i, %check_netlist.exit.i
- br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
-
-bb1.i.i.i122: ; preds = %bb1.i.i.i122, %bb.i.i3.i
- br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
-
-bb3.i.i4.i: ; preds = %bb1.i.i.i122, %bb.i.i3.i
- br i1 undef, label %read_net.exit, label %bb.i.i3.i
-
-read_net.exit: ; preds = %bb3.i.i4.i
- br i1 undef, label %bb.i44, label %bb3.i47
-
-bb.i44: ; preds = %read_net.exit
- unreachable
-
-bb3.i47: ; preds = %read_net.exit
- br i1 false, label %bb9.i50, label %bb8.i49
-
-bb8.i49: ; preds = %bb3.i47
- unreachable
-
-bb9.i50: ; preds = %bb3.i47
- br i1 undef, label %bb11.i51, label %bb12.i52
-
-bb11.i51: ; preds = %bb9.i50
- unreachable
-
-bb12.i52: ; preds = %bb9.i50
- br i1 undef, label %bb.i.i53, label %my_malloc.exit.i54
-
-bb.i.i53: ; preds = %bb12.i52
- unreachable
-
-my_malloc.exit.i54: ; preds = %bb12.i52
- br i1 undef, label %bb.i2.i55, label %my_malloc.exit3.i56
-
-bb.i2.i55: ; preds = %my_malloc.exit.i54
- unreachable
-
-my_malloc.exit3.i56: ; preds = %my_malloc.exit.i54
- br i1 undef, label %bb.i.i.i57, label %my_malloc.exit.i.i
-
-bb.i.i.i57: ; preds = %my_malloc.exit3.i56
- unreachable
-
-my_malloc.exit.i.i: ; preds = %my_malloc.exit3.i56
- br i1 undef, label %bb, label %bb10
-
-bb: ; preds = %my_malloc.exit.i.i
- unreachable
-
-bb10: ; preds = %my_malloc.exit.i.i
- br i1 false, label %bb12, label %bb11
-
-bb11: ; preds = %bb10
- unreachable
-
-bb12: ; preds = %bb10
- store float %annealing_sched.1.0, float* null, align 4
- store float %annealing_sched.2.0, float* undef, align 8
- store float %annealing_sched.3.0, float* undef, align 4
- store float %annealing_sched.4.0, float* undef, align 8
- store float %router_opts.0.0, float* undef, align 8
- store float %router_opts.1.0, float* undef, align 4
- store float %router_opts.2.0, float* null, align 8
- store float %router_opts.3.0, float* undef, align 4
- br i1 undef, label %place_and_route.exit, label %bb7.i22
-
-bb7.i22: ; preds = %bb12
- br i1 false, label %bb8.i23, label %bb9.i26
-
-bb8.i23: ; preds = %bb7.i22
- unreachable
-
-bb9.i26: ; preds = %bb7.i22
- unreachable
-
-place_and_route.exit: ; preds = %bb12
- unreachable
-}
-
-declare i32 @"\01_printf$LDBL128"(i8*, ...) nounwind
-
-declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
-
-declare i32 @"\01_sscanf$LDBL128"(i8*, i8*, ...) nounwind
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll Fri Jul 2 04:57:13 2010
@@ -2,8 +2,8 @@
; rdar://7268481
define void @t() nounwind {
-; CHECK: t:
-; CHECK-NEXT : push {r7}
+; CHECK: t:
+; CHECK: push {r7}
entry:
call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind
ret void
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll Fri Jul 2 04:57:13 2010
@@ -12,8 +12,6 @@
; CHECK: _ZNKSs7compareERKSs:
; CHECK: it eq
; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: %bb
-; CHECK-NEXT: %bb1
; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc}
entry:
%0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll Fri Jul 2 04:57:13 2010
@@ -19,3 +19,31 @@
}
declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+ at sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
+ at dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2]
+
+; This function creates 4 chained INSERT_SUBREGS and then invokes the register scavenger.
+; The first INSERT_SUBREG needs an <undef> use operand for that to work.
+define arm_apcscc i32 @main() nounwind {
+bb.nph:
+ br label %bb
+
+bb: ; preds = %bb, %bb.nph
+ %0 = phi i32 [ 0, %bb.nph ], [ %1, %bb ] ; <i32> [#uses=4]
+ %scevgep = getelementptr [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+ %scevgep5 = getelementptr [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+ store i32 %0, i32* %scevgep, align 4
+ store i32 -1, i32* %scevgep5, align 4
+ %1 = add nsw i32 %0, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %1, 16 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb2, label %bb
+
+bb2: ; preds = %bb
+ %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind
+ ret i32 0
+}
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll Fri Jul 2 04:57:13 2010
@@ -25,10 +25,7 @@
; CHECK: ldr.w r9, [r7, #28]
%xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0]
%ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0]
- br i1 false, label %bb, label %bb20
-
-bb: ; preds = %entry
- unreachable
+ br label %bb20
bb20: ; preds = %entry
switch i32 undef, label %bb1287 [
Removed: llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-A8
-; RUN: llc < %s -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK-M3
-
-target triple = "thumbv7-apple-darwin10"
-
-define i32 @f1(i16* %ptr) nounwind {
-; CHECK-A8: f1
-; CHECK-A8: sxth
-; CHECK-M3: f1
-; CHECK-M3-NOT: sxth
-; CHECK-M3: bx lr
- %1 = load i16* %ptr
- %2 = icmp eq i16 %1, 1
- %3 = sext i16 %1 to i32
- br i1 %2, label %.next, label %.exit
-
-.next:
- br label %.exit
-
-.exit:
- ret i32 %3
-}
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll Fri Jul 2 04:57:13 2010
@@ -11,7 +11,7 @@
; LINUX: f:
; LINUX: bl g
- call void @g( i32 1, i32 2, i32 3, i32 4 )
+ tail call void @g( i32 1, i32 2, i32 3, i32 4 )
ret void
}
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll Fri Jul 2 04:57:13 2010
@@ -21,7 +21,7 @@
bb9: ; preds = %bb7
; CHECK: cmp r0, #0
-; CHECK-NEXT: cmp r0, #0
+; CHECK: cmp r0, #0
; CHECK-NEXT: cbnz
%0 = tail call double @floor(double %b) nounwind readnone ; <double> [#uses=0]
br label %bb11
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll Fri Jul 2 04:57:13 2010
@@ -9,11 +9,18 @@
define i32 @f2(i32 %a, i32 %b) {
; CHECK: f2:
-; CHECK: eor.w r0, r1, r0
+; CHECK: eors r0, r1
%tmp = xor i32 %b, %a
ret i32 %tmp
}
+define i32 @f2b(i32 %a, i32 %b, i32 %c) {
+; CHECK: f2b:
+; CHECK: eor.w r0, r1, r2
+ %tmp = xor i32 %b, %c
+ ret i32 %tmp
+}
+
define i32 @f3(i32 %a, i32 %b) {
; CHECK: f3:
; CHECK: eor.w r0, r0, r1, lsl #5
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll Fri Jul 2 04:57:13 2010
@@ -31,7 +31,8 @@
; CHECK: CountTree:
; CHECK: it eq
; CHECK: cmpeq
-; CHECK: beq
+; CHECK: bne
+; CHECK: cmp
; CHECK: itt eq
; CHECK: moveq
; CHECK: popeq
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll Fri Jul 2 04:57:13 2010
@@ -23,7 +23,7 @@
; CHECK: movne
; CHECK: moveq
; CHECK: pop
-; CHECK-NEXT: LBB0_1:
+; CHECK-NEXT: @ BB#1:
%0 = load i64* @posed, align 4 ; <i64> [#uses=3]
%1 = sub i64 %0, %.reload78 ; <i64> [#uses=1]
%2 = ashr i64 %1, 1 ; <i64> [#uses=3]
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll Fri Jul 2 04:57:13 2010
@@ -3,8 +3,8 @@
define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK: t1
; CHECK: sub.w r0, r1, #-2147483648
+; CHECK: subs r0, #1
; CHECK: cmp r2, #10
-; CHECK: sub.w r0, r0, #1
; CHECK: it gt
; CHECK: movgt r0, r1
%tmp1 = icmp sgt i32 %c, 10
Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll Fri Jul 2 04:57:13 2010
@@ -1,47 +1,72 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARMv7A
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=ARMv7M
define i32 @test1(i32 %x) {
-; CHECK: test1
-; CHECK: uxtb16 r0, r0
+; ARMv7A: test1
+; ARMv7A: uxtb16 r0, r0
+
+; ARMv7M: test1
+; ARMv7M: and r0, r0, #16711935
%tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1]
ret i32 %tmp1
}
+; PR7503
define i32 @test2(i32 %x) {
-; CHECK: test2
-; CHECK: uxtb16 r0, r0, ror #8
+; ARMv7A: test2
+; ARMv7A: uxtb16 r0, r0, ror #8
+
+; ARMv7M: test2
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
%tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
ret i32 %tmp2
}
define i32 @test3(i32 %x) {
-; CHECK: test3
-; CHECK: uxtb16 r0, r0, ror #8
+; ARMv7A: test3
+; ARMv7A: uxtb16 r0, r0, ror #8
+
+; ARMv7M: test3
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
%tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
ret i32 %tmp2
}
define i32 @test4(i32 %x) {
-; CHECK: test4
-; CHECK: uxtb16 r0, r0, ror #8
+; ARMv7A: test4
+; ARMv7A: uxtb16 r0, r0, ror #8
+
+; ARMv7M: test4
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
%tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
%tmp6 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
ret i32 %tmp6
}
define i32 @test5(i32 %x) {
-; CHECK: test5
-; CHECK: uxtb16 r0, r0, ror #8
+; ARMv7A: test5
+; ARMv7A: uxtb16 r0, r0, ror #8
+
+; ARMv7M: test5
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
%tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
ret i32 %tmp2
}
define i32 @test6(i32 %x) {
-; CHECK: test6
-; CHECK: uxtb16 r0, r0, ror #16
+; ARMv7A: test6
+; ARMv7A: uxtb16 r0, r0, ror #16
+
+; ARMv7M: test6
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
%tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1]
%tmp4 = shl i32 %x, 16 ; <i32> [#uses=1]
@@ -51,8 +76,12 @@
}
define i32 @test7(i32 %x) {
-; CHECK: test7
-; CHECK: uxtb16 r0, r0, ror #16
+; ARMv7A: test7
+; ARMv7A: uxtb16 r0, r0, ror #16
+
+; ARMv7M: test7
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
%tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1]
%tmp4 = shl i32 %x, 16 ; <i32> [#uses=1]
@@ -62,8 +91,12 @@
}
define i32 @test8(i32 %x) {
-; CHECK: test8
-; CHECK: uxtb16 r0, r0, ror #24
+; ARMv7A: test8
+; ARMv7A: uxtb16 r0, r0, ror #24
+
+; ARMv7M: test8
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
%tmp1 = shl i32 %x, 8 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16711680 ; <i32> [#uses=1]
%tmp5 = lshr i32 %x, 24 ; <i32> [#uses=1]
@@ -72,8 +105,12 @@
}
define i32 @test9(i32 %x) {
-; CHECK: test9
-; CHECK: uxtb16 r0, r0, ror #24
+; ARMv7A: test9
+; ARMv7A: uxtb16 r0, r0, ror #24
+
+; ARMv7M: test9
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
%tmp1 = lshr i32 %x, 24 ; <i32> [#uses=1]
%tmp4 = shl i32 %x, 8 ; <i32> [#uses=1]
%tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1]
@@ -82,13 +119,19 @@
}
define i32 @test10(i32 %p0) {
-; CHECK: test10
-; CHECK: mov.w r1, #16253176
-; CHECK: and.w r0, r1, r0, lsr #7
-; CHECK: lsrs r1, r0, #5
-; CHECK: uxtb16 r1, r1
-; CHECK: orr.w r0, r1, r0
-
+; ARMv7A: test10
+; ARMv7A: mov.w r1, #16253176
+; ARMv7A: and.w r0, r1, r0, lsr #7
+; ARMv7A: lsrs r1, r0, #5
+; ARMv7A: uxtb16 r1, r1
+; ARMv7A: orrs r0, r1
+
+; ARMv7M: test10
+; ARMv7M: mov.w r1, #16253176
+; ARMv7M: and.w r0, r1, r0, lsr #7
+; ARMv7M: mov.w r1, #458759
+; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: orrs r0, r1
%tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]
%tmp4 = lshr i32 %tmp2, 5 ; <i32> [#uses=1]
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll Fri Jul 2 04:57:13 2010
@@ -15,14 +15,14 @@
%tmp99 = udiv i64 0, 0 ; <i64> [#uses=1]
%tmp = load i8* null ; <i8> [#uses=1]
%tmp114 = icmp eq i64 0, 0 ; <i1> [#uses=1]
- br i1 %tmp114, label %cond_true115, label %cond_next136
+ br label %cond_true115
bb84: ; preds = %entry
ret void
cond_true115: ; preds = %bb77
%tmp118 = load i8* null ; <i8> [#uses=1]
- br i1 false, label %cond_next129, label %cond_true120
+ br label %cond_true120
cond_true120: ; preds = %cond_true115
%tmp127 = udiv i8 %tmp, %tmp118 ; <i8> [#uses=1]
@@ -30,7 +30,7 @@
br label %cond_next129
cond_next129: ; preds = %cond_true120, %cond_true115
- %iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ], [ 0, %cond_true115 ] ; <i64> [#uses=1]
+ %iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ] ; <i64> [#uses=1]
%tmp132 = icmp eq i64 %iftmp.30.0, %tmp99 ; <i1> [#uses=1]
br i1 %tmp132, label %cond_false148, label %cond_next136
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll Fri Jul 2 04:57:13 2010
@@ -11,12 +11,12 @@
%tmp14 = fadd float %tmp12, %tmp7
ret float %tmp14
-; CHECK: mulss LCPI0_0(%rip)
-; CHECK: mulss LCPI0_1(%rip)
+; CHECK: mulss
+; CHECK: mulss
; CHECK: addss
-; CHECK: mulss LCPI0_2(%rip)
+; CHECK: mulss
; CHECK: addss
-; CHECK: mulss LCPI0_3(%rip)
+; CHECK: mulss
; CHECK: addss
; CHECK: ret
}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll Fri Jul 2 04:57:13 2010
@@ -9,10 +9,7 @@
%tmp149 = mul i32 0, %v.1 ; <i32> [#uses=0]
%tmp254 = and i32 0, 15 ; <i32> [#uses=1]
%tmp256 = and i32 0, 15 ; <i32> [#uses=2]
- br i1 false, label %cond_true267, label %cond_next391
-
-cond_true267: ; preds = %cond_next127
- ret i16 0
+ br label %cond_next391
cond_next391: ; preds = %cond_next127
%tmp393 = load i32* %ss, align 4 ; <i32> [#uses=1]
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll Fri Jul 2 04:57:13 2010
@@ -1,7 +1,7 @@
-; RUN: llc < %s -relocation-model=static | grep {foo _str$}
+; RUN: llc < %s -relocation-model=static | grep {foo str$}
; PR1761
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
+target triple = "x86_64-pc-linux"
@str = internal constant [12 x i8] c"init/main.c\00" ; <[12 x i8]*> [#uses=1]
define i32 @unknown_bootoption() {
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll (removed)
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
-
- %struct.double_int = type { i64, i64 }
- %struct.tree_common = type <{ i8, [3 x i8] }>
- %struct.tree_int_cst = type { %struct.tree_common, %struct.double_int }
- %struct.tree_node = type { %struct.tree_int_cst }
- at tree_code_type = external constant [0 x i32] ; <[0 x i32]*> [#uses=1]
-
-define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
-entry:
- %tmp2526 = bitcast %struct.tree_node* %t1 to i32* ; <i32*> [#uses=1]
- br i1 false, label %UnifiedReturnBlock, label %bb21
-
-bb21: ; preds = %entry
- %tmp27 = load i32* %tmp2526, align 4 ; <i32> [#uses=1]
- %tmp29 = and i32 %tmp27, 255 ; <i32> [#uses=3]
- %tmp2930 = trunc i32 %tmp29 to i8 ; <i8> [#uses=1]
- %tmp37 = load i32* null, align 4 ; <i32> [#uses=1]
- %tmp39 = and i32 %tmp37, 255 ; <i32> [#uses=2]
- %tmp3940 = trunc i32 %tmp39 to i8 ; <i8> [#uses=1]
- %tmp43 = add i32 %tmp29, -3 ; <i32> [#uses=1]
- %tmp44 = icmp ult i32 %tmp43, 3 ; <i1> [#uses=1]
- br i1 %tmp44, label %bb47.split, label %bb76
-
-bb47.split: ; preds = %bb21
- ret i32 0
-
-bb76: ; preds = %bb21
- br i1 false, label %bb82, label %bb146.split
-
-bb82: ; preds = %bb76
- %tmp94 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp39 ; <i32*> [#uses=1]
- %tmp95 = load i32* %tmp94, align 4 ; <i32> [#uses=1]
- %tmp9596 = trunc i32 %tmp95 to i8 ; <i8> [#uses=1]
- %tmp98 = add i8 %tmp9596, -4 ; <i8> [#uses=1]
- %tmp99 = icmp ugt i8 %tmp98, 5 ; <i1> [#uses=1]
- br i1 %tmp99, label %bb102, label %bb106
-
-bb102: ; preds = %bb82
- ret i32 0
-
-bb106: ; preds = %bb82
- ret i32 0
-
-bb146.split: ; preds = %bb76
- %tmp149 = icmp eq i8 %tmp2930, %tmp3940 ; <i1> [#uses=1]
- br i1 %tmp149, label %bb153, label %UnifiedReturnBlock
-
-bb153: ; preds = %bb146.split
- switch i32 %tmp29, label %UnifiedReturnBlock [
- i32 0, label %bb155
- i32 1, label %bb187
- ]
-
-bb155: ; preds = %bb153
- ret i32 0
-
-bb187: ; preds = %bb153
- %tmp198 = icmp eq %struct.tree_node* %t1, %t2 ; <i1> [#uses=1]
- br i1 %tmp198, label %bb201, label %UnifiedReturnBlock
-
-bb201: ; preds = %bb187
- ret i32 0
-
-UnifiedReturnBlock: ; preds = %bb187, %bb153, %bb146.split, %entry
- ret i32 0
-}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll Fri Jul 2 04:57:13 2010
@@ -19,7 +19,7 @@
br i1 %tmp35, label %bb38, label %bb87.preheader
bb38: ; preds = %bb33
%tmp53 = add i32 %tmp19, %delta ; <i32> [#uses=2]
- br i1 false, label %bb50, label %bb43
+ br label %bb43
bb43: ; preds = %bb38
store i32 %tmp53, i32* null, align 4
ret void
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll Fri Jul 2 04:57:13 2010
@@ -10,7 +10,7 @@
define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind {
entry:
%tmp3.i40 = icmp eq %struct.binding_level* null, null ; <i1> [#uses=2]
- br i1 false, label %bb143, label %bb140
+ br label %bb140
bb140: ; preds = %entry
br i1 %tmp3.i40, label %bb160, label %bb17.i
bb17.i: ; preds = %bb140
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll Fri Jul 2 04:57:13 2010
@@ -7,7 +7,7 @@
define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
entry:
- br i1 false, label %bb151, label %bb163
+ br label %bb163
bb151: ; preds = %entry
br label %bb163
@@ -19,13 +19,13 @@
br label %bb5.i
bb5.i: ; preds = %bb5.i57.i, %bb163
- %b.0.i = phi %struct.Bigint* [ null, %bb163 ], [ %tmp9.i.i41.i, %bb5.i57.i ] ; <%struct.Bigint*> [#uses=1]
+ %b.0.i = phi %struct.Bigint* [ null, %bb163 ] ; <%struct.Bigint*> [#uses=1]
%tmp3.i7.i728 = load i32* null, align 4 ; <i32> [#uses=1]
br label %bb.i27.i
bb.i27.i: ; preds = %bb.i27.i, %bb5.i
%tmp23.i20.i = lshr i32 0, 16 ; <i32> [#uses=1]
- br i1 false, label %bb.i27.i, label %bb5.i57.i
+ br label %bb5.i57.i
bb5.i57.i: ; preds = %bb.i27.i
%tmp50.i35.i = load i32* null, align 4 ; <i32> [#uses=1]
@@ -41,7 +41,7 @@
store i32 %tmp23.i20.i, i32* null, align 4
%tmp74.i61.i = add i32 %tmp3.i7.i728, 1 ; <i32> [#uses=1]
store i32 %tmp74.i61.i, i32* null, align 4
- br i1 false, label %bb5.i, label %bb7.i
+ br label %bb7.i
bb7.i: ; preds = %bb5.i57.i
%tmp514 = load i32* null, align 4 ; <i32> [#uses=1]
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Fri Jul 2 04:57:13 2010
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)"
-; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers
; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll (removed)
@@ -1,160 +0,0 @@
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
-
- %struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
- %struct.XXDAlphaTest = type { float, i16, i8, i8 }
- %struct.XXDArrayRange = type { i8, i8, i8, i8 }
- %struct.XXDBlendMode = type { i16, i16, i16, i16, %struct.XXTColor4, i16, i16, i8, i8, i8, i8 }
- %struct.XXDClearColor = type { double, %struct.XXTColor4, %struct.XXTColor4, float, i32 }
- %struct.XXDClipPlane = type { i32, [6 x %struct.XXTColor4] }
- %struct.XXDColorBuffer = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
- %struct.XXDColorMatrix = type { [16 x float]*, %struct.XXDImagingCC }
- %struct.XXDConvolution = type { %struct.XXTColor4, %struct.XXDImagingCC, i16, i16, [0 x i32], float*, i32, i32 }
- %struct.XXDDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
- %struct.XXDFixedFunction = type { %struct.YYToken* }
- %struct.XXDFogMode = type { %struct.XXTColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
- %struct.XXDHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
- %struct.XXDHistogram = type { %struct.XXTFixedColor4*, i32, i16, i8, i8 }
- %struct.XXDImagingCC = type { { float, float }, { float, float }, { float, float }, { float, float } }
- %struct.XXDImagingSubset = type { %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDColorMatrix, %struct.XXDMinmax, %struct.XXDHistogram, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, i32, [0 x i32] }
- %struct.XXDLight = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTCoord3, float, float, float, float, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, float, float, float, float }
- %struct.XXDLightModel = type { %struct.XXTColor4, [8 x %struct.XXDLight], [2 x %struct.XXDMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
- %struct.XXDLightProduct = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4 }
- %struct.XXDLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
- %struct.XXDLogicOp = type { i16, i8, i8 }
- %struct.XXDMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
- %struct.XXDMaterial = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, float, float, float, float, [8 x %struct.XXDLightProduct], %struct.XXTColor4, [8 x i32] }
- %struct.XXDMinmax = type { %struct.XXDMinmaxTable*, i16, i8, i8, [0 x i32] }
- %struct.XXDMinmaxTable = type { %struct.XXTColor4, %struct.XXTColor4 }
- %struct.XXDMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
- %struct.XXDPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.XXTColor4* }
- %struct.XXDPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
- %struct.XXDPixelMode = type { float, float, %struct.XXDPixelStore, %struct.XXDPixelTransfer, %struct.XXDPixelMap, %struct.XXDImagingSubset, i32, i32 }
- %struct.XXDPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
- %struct.XXDPixelStore = type { %struct.XXDPixelPack, %struct.XXDPixelPack }
- %struct.XXDPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
- %struct.XXDPointMode = type { float, float, float, float, %struct.XXTCoord3, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
- %struct.XXDPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
- %struct.XXDRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.XXTColor4], [8 x %struct.XXDRegisterCombinersPerStageState], %struct.XXDRegisterCombinersFinalStageState }
- %struct.XXDRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXDRegisterCombinersPerVariableState] }
- %struct.XXDRegisterCombinersPerPortionState = type { [4 x %struct.XXDRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
- %struct.XXDRegisterCombinersPerStageState = type { [2 x %struct.XXDRegisterCombinersPerPortionState], [2 x %struct.XXTColor4] }
- %struct.XXDRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
- %struct.XXDScissorTest = type { %struct.XXTFixedColor4, i8, i8, i8, i8 }
- %struct.XXDState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.XXTColor4], [128 x %struct.XXTColor4], %struct.XXDViewport, %struct.XXDTransform, %struct.XXDLightModel, %struct.XXDActiveTextureTargets, %struct.XXDAlphaTest, %struct.XXDBlendMode, %struct.XXDClearColor, %struct.XXDColorBuffer, %struct.XXDDepthTest, %struct.XXDArrayRange, %struct.XXDFogMode, %struct.XXDHintMode, %struct.XXDLineMode, %struct.XXDLogicOp, %struct.XXDMaskMode, %struct.XXDPixelMode, %struct.XXDPointMode, %struct.XXDPolygonMode, %struct.XXDScissorTest, i32, %struct.XXDStencilTest, [8 x %struct.XXDTextureMode], [16 x %struct.XXDTextureImageMode], %struct.XXDArrayRange, [8 x %struct.XXDTextureCoordGen], %struct.XXDClipPlane, %struct.XXDMultisample, %struct.XXDRegisterCombiners, %struct.XXDArrayRange, %struct.XXDArrayRange, [3 x %struct.XXDPipelineProgramState], %struct.XXDArrayRange, %struct.XXDTransformFeedback, i32*, %struct.XXDFixedFunction, [3 x i32], [2 x i32] }>
- %struct.XXDStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
- %struct.XXDTextureCoordGen = type { { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, i8, i8, i8, i8 }
- %struct.XXDTextureImageMode = type { float }
- %struct.XXDTextureMode = type { %struct.XXTColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
- %struct.XXDTextureRec = type opaque
- %struct.XXDTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
- %struct.XXDTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
- %struct.XXDViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
- %struct.XXTColor4 = type { float, float, float, float }
- %struct.XXTCoord3 = type { float, float, float }
- %struct.XXTFixedColor4 = type { i32, i32, i32, i32 }
- %struct.XXVMTextures = type { [16 x %struct.XXDTextureRec*] }
- %struct.XXVMVPContext = type { i32 }
- %struct.XXVMVPStack = type { i32, i32 }
- %struct.YYToken = type { { i16, i16, i32 } }
- %struct._XXVMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
- at llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXDState*, <4 x float>*, <4 x float>**, %struct._XXVMConstants*, %struct.YYToken*, %struct.XXVMVPContext*, %struct.XXVMTextures*, %struct.XXVMVPStack*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, [4 x <4 x float>]*, i32*, <4 x i32>*, i64)* @t to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
-entry:
-; CHECK: t:
- %0 = trunc i64 %key_token to i32 ; <i32> [#uses=1]
- %1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
- br label %bb1132
-
-bb51: ; preds = %bb1132
-; CHECK: .align 4
-; CHECK: xorl %ecx, %ecx
-; CHECK: andl $7
- %2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1]
- %3 = load i16* %2, align 1 ; <i16> [#uses=3]
- %4 = lshr i16 %3, 6 ; <i16> [#uses=1]
- %5 = trunc i16 %4 to i8 ; <i8> [#uses=1]
- %6 = zext i8 %5 to i32 ; <i32> [#uses=1]
- %7 = trunc i16 %3 to i8 ; <i8> [#uses=1]
- %8 = and i8 %7, 7 ; <i8> [#uses=1]
- %mask5556 = zext i8 %8 to i32 ; <i32> [#uses=3]
- %.sum1324 = add i32 %mask5556, 2 ; <i32> [#uses=1]
- %.rec = add i32 %operation.0.rec, %.sum1324 ; <i32> [#uses=1]
- %9 = bitcast %struct.YYToken* %operation.0 to i32* ; <i32*> [#uses=1]
- %10 = load i32* %9, align 1 ; <i32> [#uses=1]
- %11 = lshr i32 %10, 16 ; <i32> [#uses=2]
- %12 = trunc i32 %11 to i8 ; <i8> [#uses=1]
- %13 = and i8 %12, 1 ; <i8> [#uses=1]
- %14 = lshr i16 %3, 15 ; <i16> [#uses=1]
- %15 = trunc i16 %14 to i8 ; <i8> [#uses=1]
- %16 = or i8 %13, %15 ; <i8> [#uses=1]
- %17 = icmp eq i8 %16, 0 ; <i1> [#uses=1]
- br i1 %17, label %bb94, label %bb75
-
-bb75: ; preds = %bb51
- %18 = getelementptr %struct.YYToken* %1, i32 0, i32 0, i32 0 ; <i16*> [#uses=1]
- %19 = load i16* %18, align 4 ; <i16> [#uses=1]
- %20 = load i16* null, align 2 ; <i16> [#uses=1]
- %21 = zext i16 %19 to i64 ; <i64> [#uses=1]
- %22 = zext i16 %20 to i64 ; <i64> [#uses=1]
- %23 = shl i64 %22, 16 ; <i64> [#uses=1]
- %.ins1177 = or i64 %23, %21 ; <i64> [#uses=1]
- %.ins1175 = or i64 %.ins1177, 0 ; <i64> [#uses=1]
- %24 = and i32 %11, 1 ; <i32> [#uses=1]
- %.neg1333 = sub i32 %mask5556, %24 ; <i32> [#uses=1]
- %.neg1335 = sub i32 %.neg1333, 0 ; <i32> [#uses=1]
- %25 = sub i32 %.neg1335, 0 ; <i32> [#uses=1]
- br label %bb94
-
-bb94: ; preds = %bb75, %bb51
- %extraToken.0 = phi i64 [ %.ins1175, %bb75 ], [ %extraToken.1, %bb51 ] ; <i64> [#uses=1]
- %argCount.0 = phi i32 [ %25, %bb75 ], [ %mask5556, %bb51 ] ; <i32> [#uses=1]
- %operation.0.sum1392 = add i32 %operation.0.rec, 1 ; <i32> [#uses=2]
- %26 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 0 ; <i16*> [#uses=1]
- %27 = load i16* %26, align 4 ; <i16> [#uses=1]
- %28 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 1 ; <i16*> [#uses=1]
- %29 = load i16* %28, align 2 ; <i16> [#uses=1]
- store i16 %27, i16* null, align 8
- store i16 %29, i16* null, align 2
- br i1 false, label %bb1132, label %bb110
-
-bb110: ; preds = %bb94
- switch i32 %6, label %bb1078 [
- i32 30, label %bb960
- i32 32, label %bb801
- i32 38, label %bb809
- i32 78, label %bb1066
- ]
-
-bb801: ; preds = %bb110
- unreachable
-
-bb809: ; preds = %bb110
- unreachable
-
-bb960: ; preds = %bb110
- %30 = icmp eq i32 %argCount.0, 1 ; <i1> [#uses=1]
- br i1 %30, label %bb962, label %bb965
-
-bb962: ; preds = %bb960
- unreachable
-
-bb965: ; preds = %bb960
- unreachable
-
-bb1066: ; preds = %bb110
- unreachable
-
-bb1078: ; preds = %bb110
- unreachable
-
-bb1132: ; preds = %bb94, %entry
- %extraToken.1 = phi i64 [ undef, %entry ], [ %extraToken.0, %bb94 ] ; <i64> [#uses=1]
- %operation.0.rec = phi i32 [ 0, %entry ], [ %.rec, %bb94 ] ; <i32> [#uses=4]
- %operation.0 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec ; <%struct.YYToken*> [#uses=1]
- br i1 false, label %bb1134, label %bb51
-
-bb1134: ; preds = %bb1132
- ret void
-}
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll (removed)
@@ -1,84 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
-; PR3311
-
- %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
- %struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
- %struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
- %struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
- %struct.VEC_edge_gc = type { %struct.VEC_edge_base }
- %struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
- %struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
- %struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
- %struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
- %struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
- %struct.VEC_tree_gc = type { %struct.VEC_tree_base }
- %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
- %struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
- %struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
- %struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
- %struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
- %struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
- %struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
- %struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }
- %struct.c_language_function = type { %struct.stmt_tree_s }
- %struct.c_switch = type opaque
- %struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
- %struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
- %struct.edge_def_insns = type { %struct.rtx_def* }
- %struct.edge_prediction = type opaque
- %struct.eh_status = type opaque
- %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
- %struct.et_node = type opaque
- %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
- %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.location_t, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
- %struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
- %struct.initial_value_struct = type opaque
- %struct.lang_decl = type { i8 }
- %struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }
- %struct.location_t = type { i8*, i32 }
- %struct.loop = type opaque
- %struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, [4 x i32], i32, i32, i32 }
- %struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
- %struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
- %struct.omp_clause_subcode = type { i32 }
- %struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
- %struct.rtunion = type { i8* }
- %struct.rtx_def = type { i16, i8, i8, %struct.u }
- %struct.section = type { %struct.unnamed_section }
- %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
- %struct.stack_local_entry = type opaque
- %struct.stmt_tree_s = type { %struct.tree_node*, i32 }
- %struct.temp_slot = type opaque
- %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
- %struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
- %struct.tree_decl_minimal = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, %struct.tree_node* }
- %struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
- %struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
- %struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
- %struct.tree_function_decl = type { %struct.tree_decl_non_common, i32, i8, i8, i64, %struct.function* }
- %struct.tree_node = type { %struct.tree_function_decl }
- %struct.u = type { %struct.block_symbol }
- %struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
- %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
- %struct.varasm_status = type opaque
- %union.tree_ann_d = type opaque
- at .str1 = external constant [31 x i8] ; <[31 x i8]*> [#uses=1]
- at integer_types = external global [11 x %struct.tree_node*] ; <[11 x %struct.tree_node*]*> [#uses=1]
- at __FUNCTION__.31164 = external constant [23 x i8], align 16 ; <[23 x i8]*> [#uses=1]
- at llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @c_common_type_for_size to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define i32 @c_common_type_for_size(i32 %bits, i32 %unsignedp) nounwind {
-entry:
- %0 = load %struct.tree_node** getelementptr ([11 x %struct.tree_node*]* @integer_types, i32 0, i64 5), align 8 ; <%struct.tree_node*> [#uses=1]
- br i1 false, label %bb16, label %bb
-
-bb: ; preds = %entry
- tail call void @tree_class_check_failed(%struct.tree_node* %0, i32 2, i8* getelementptr ([31 x i8]* @.str1, i32 0, i64 0), i32 1785, i8* getelementptr ([23 x i8]* @__FUNCTION__.31164, i32 0, i32 0)) noreturn nounwind
- unreachable
-
-bb16: ; preds = %entry
- %tmp = add i32 %bits, %unsignedp ; <i32> [#uses=1]
- ret i32 %tmp
-}
-
-declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*) noreturn
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll Fri Jul 2 04:57:13 2010
@@ -2,7 +2,7 @@
; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
; CHECK: .globl l_objc_msgSend_fixup_alloc
; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
-; PR4689
-
-%struct.__s = type { [8 x i8] }
-%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
-%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
-
-define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
-; CHECK: hammer_time:
-; CHECK: movq $Xrsvd, %rax
-; CHECK: movq $Xrsvd, %rcx
-entry:
- br i1 undef, label %if.then, label %if.end
-
-if.then: ; preds = %entry
- br label %if.end
-
-if.end: ; preds = %if.then, %entry
- br label %for.body
-
-for.body: ; preds = %for.inc, %if.end
- switch i32 undef, label %if.then76 [
- i32 9, label %for.inc
- i32 10, label %for.inc
- i32 11, label %for.inc
- i32 12, label %for.inc
- ]
-
-if.then76: ; preds = %for.body
- unreachable
-
-for.inc: ; preds = %for.body, %for.body, %for.body, %for.body
- br i1 undef, label %for.end, label %for.body
-
-for.end: ; preds = %for.inc
- call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
- br label %for.body170
-
-for.body170: ; preds = %for.body170, %for.end
- store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
- br i1 undef, label %for.end175, label %for.body170
-
-for.end175: ; preds = %for.body170
- unreachable
-}
-
-declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -post-RA-scheduler=true | FileCheck %s
-
-; PR4958
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: main:
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- br label %bb
-
-bb: ; preds = %bb1, %entry
-; CHECK: addl $1
-; CHECK-NEXT: movl %e
-; CHECK-NEXT: adcl $0
- %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ] ; <i64> [#uses=1]
- %0 = add nsw i64 %i.0, 1 ; <i64> [#uses=2]
- %1 = icmp sgt i32 0, 0 ; <i1> [#uses=1]
- br i1 %1, label %bb2, label %bb1
-
-bb1: ; preds = %bb
- %2 = icmp sle i64 %0, 1 ; <i1> [#uses=1]
- br i1 %2, label %bb, label %bb2
-
-bb2: ; preds = %bb1, %bb
- br label %return
-
-return: ; preds = %bb2
- ret i32 0
-}
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll (removed)
@@ -1,40 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
-
-define i32 @do_loop(i32* nocapture %sdp, i32* nocapture %ddp, i8* %mdp, i8* nocapture %cdp, i32 %w) nounwind readonly optsize ssp {
-entry:
- br label %bb
-
-bb: ; preds = %bb5, %entry
- %mask.1.in = load i8* undef, align 1 ; <i8> [#uses=3]
- %0 = icmp eq i8 %mask.1.in, 0 ; <i1> [#uses=1]
- br i1 %0, label %bb5, label %bb1
-
-bb1: ; preds = %bb
- br i1 undef, label %bb2, label %bb3
-
-bb2: ; preds = %bb1
-; CHECK: %bb2
-; CHECK: movb %ch, %al
- %1 = zext i8 %mask.1.in to i32 ; <i32> [#uses=1]
- %2 = zext i8 undef to i32 ; <i32> [#uses=1]
- %3 = mul i32 %2, %1 ; <i32> [#uses=1]
- %4 = add i32 %3, 1 ; <i32> [#uses=1]
- %5 = add i32 %4, 0 ; <i32> [#uses=1]
- %6 = lshr i32 %5, 8 ; <i32> [#uses=1]
- %retval12.i = trunc i32 %6 to i8 ; <i8> [#uses=1]
- br label %bb3
-
-bb3: ; preds = %bb2, %bb1
- %mask.0.in = phi i8 [ %retval12.i, %bb2 ], [ %mask.1.in, %bb1 ] ; <i8> [#uses=1]
- %7 = icmp eq i8 %mask.0.in, 0 ; <i1> [#uses=1]
- br i1 %7, label %bb5, label %bb4
-
-bb4: ; preds = %bb3
- br label %bb5
-
-bb5: ; preds = %bb4, %bb3, %bb
- br i1 undef, label %bb6, label %bb
-
-bb6: ; preds = %bb5
- ret i32 undef
-}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll Fri Jul 2 04:57:13 2010
@@ -9,11 +9,11 @@
; lowering of arguments potentially overwrites the value.
;
; Move return address (76(%esp)) to a temporary register (%ebp)
-; CHECK: movl 76(%esp), %ebp
+; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
; Overwrite return addresss
-; CHECK: movl %ecx, 76(%esp)
+; CHECK: movl %ebx, 76(%esp)
; Move return address from temporary register (%ebp) to new stack location (60(%esp))
-; CHECK: movl %ebp, 60(%esp)
+; CHECK: movl [[REGISTER]], 60(%esp)
%tupl_p = type [9 x i32]*
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll Fri Jul 2 04:57:13 2010
@@ -4,7 +4,7 @@
entry:
; CHECK: t1:
; CHECK: movss (%rdi), %xmm0
-; CHECK; cvtss2sd %xmm0, %xmm0
+; CHECK: cvtss2sd %xmm0, %xmm0
%0 = load float* %x, align 4
%1 = fpext float %0 to double
@@ -14,7 +14,7 @@
define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
entry:
; CHECK: t2:
-; CHECK; cvtsd2ss (%rdi), %xmm0
+; CHECK: cvtsd2ss (%rdi), %xmm0
%0 = load double* %x, align 8
%1 = fptrunc double %0 to float
ret float %1
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll Fri Jul 2 04:57:13 2010
@@ -51,3 +51,22 @@
; X64: ret
}
+
+define double @test4(i64 %x, double* %p) nounwind {
+entry:
+ %x.addr = alloca i64, align 8 ; <i64*> [#uses=2]
+ %p.addr = alloca double*, align 8 ; <double**> [#uses=2]
+ store i64 %x, i64* %x.addr
+ store double* %p, double** %p.addr
+ %tmp = load i64* %x.addr ; <i64> [#uses=1]
+ %add = add nsw i64 %tmp, 16 ; <i64> [#uses=1]
+ %tmp1 = load double** %p.addr ; <double*> [#uses=1]
+ %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
+ %tmp2 = load double* %arrayidx ; <double> [#uses=1]
+ ret double %tmp2
+
+; X32: test4:
+; X32: 128(%e{{.*}},%e{{.*}},8)
+; X64: test4:
+; X64: 128(%r{{.*}},%r{{.*}},8)
+}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll Fri Jul 2 04:57:13 2010
@@ -1,7 +1,8 @@
; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax}
; PR3242
-define i32 @foo(i32 %x) nounwind {
+define void @foo(i32 %x, i32* %p) nounwind {
%y = ashr i32 %x, 50000
- ret i32 %y
+ store i32 %y, i32* %p
+ ret void
}
Removed: llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86 | not grep mov
-
- %struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
- %struct.gs_fixed_point = type { i32, i32 }
- %struct.line_list = type { %struct.active_line*, i32, i16, %struct.active_line*, %struct.active_line*, %struct.active_line*, %struct.active_line, i32 }
- %struct.line_segment = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point }
- %struct.subpath = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point, %struct.line_segment*, i32, i32, i8 }
-
-define fastcc void @add_y_list(%struct.subpath* %ppath.0.4.val, i16 signext %tag, %struct.line_list* %ll, i32 %pbox.0.0.1.val, i32 %pbox.0.1.0.val, i32 %pbox.0.1.1.val) nounwind {
-entry:
- br i1 false, label %return, label %bb
-bb: ; preds = %bb280, %entry
- %psub.1.reg2mem.0 = phi %struct.subpath* [ %psub.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <%struct.subpath*> [#uses=1]
- %plast.1.reg2mem.0 = phi %struct.line_segment* [ %plast.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <%struct.line_segment*> [#uses=1]
- %prev_dir.0.reg2mem.0 = phi i32 [ %dir.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <i32> [#uses=1]
- br i1 false, label %bb280, label %bb109
-bb109: ; preds = %bb
- %tmp113 = icmp sgt i32 0, %prev_dir.0.reg2mem.0 ; <i1> [#uses=1]
- br i1 %tmp113, label %bb116, label %bb280
-bb116: ; preds = %bb109
- ret void
-bb280: ; preds = %bb109, %bb
- %psub.0.reg2mem.0 = phi %struct.subpath* [ null, %bb ], [ %psub.1.reg2mem.0, %bb109 ] ; <%struct.subpath*> [#uses=1]
- %plast.0.reg2mem.0 = phi %struct.line_segment* [ null, %bb ], [ %plast.1.reg2mem.0, %bb109 ] ; <%struct.line_segment*> [#uses=1]
- %dir.0.reg2mem.0 = phi i32 [ 0, %bb ], [ 0, %bb109 ] ; <i32> [#uses=1]
- br i1 false, label %return, label %bb
-return: ; preds = %bb280, %entry
- ret void
-}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll Fri Jul 2 04:57:13 2010
@@ -39,8 +39,7 @@
%tmp659 = icmp eq i8 %tmp658, 0 ; <i1> [#uses=1]
br i1 %tmp659, label %bb650, label %bb662
bb662: ; preds = %bb650
- %tmp685 = icmp eq %struct.rec* null, null ; <i1> [#uses=1]
- br i1 %tmp685, label %bb761, label %bb688
+ br label %bb761
bb688: ; preds = %bb662
ret void
bb761: ; preds = %bb662
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll Fri Jul 2 04:57:13 2010
@@ -2,22 +2,22 @@
define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
entry:
- br i1 false, label %cond_next191, label %cond_true189
+ br label %cond_true189
cond_true189: ; preds = %entry
ret i32 0
cond_next191: ; preds = %entry
- br i1 false, label %cond_next37.i, label %cond_false.i9
+ br label %cond_false.i9
cond_false.i9: ; preds = %cond_next191
ret i32 0
cond_next37.i: ; preds = %cond_next191
- br i1 false, label %cond_false50.i, label %cond_true44.i
+ br label %cond_true44.i
cond_true44.i: ; preds = %cond_next37.i
- br i1 false, label %cond_true11.i.i, label %bb414.preheader.i
+ br label %bb414.preheader.i
cond_true11.i.i: ; preds = %cond_true44.i
ret i32 0
@@ -26,19 +26,19 @@
ret i32 0
bb414.preheader.i: ; preds = %cond_true44.i
- br i1 false, label %bb.i18, label %do_layer3.exit
+ br label %do_layer3.exit
bb.i18: ; preds = %bb414.preheader.i
- br i1 false, label %bb358.i, label %cond_true79.i
+ br label %cond_true79.i
cond_true79.i: ; preds = %bb.i18
ret i32 0
bb331.i: ; preds = %bb358.i, %cond_true.i149.i
- br i1 false, label %cond_true.i149.i, label %cond_false.i151.i
+ br label %cond_false.i151.i
cond_true.i149.i: ; preds = %bb331.i
- br i1 false, label %bb178.preheader.i.i, label %bb331.i
+ br label %bb331.i
cond_false.i151.i: ; preds = %bb331.i
ret i32 0
@@ -56,7 +56,7 @@
br label %bb163.i.i
bb358.i: ; preds = %bb.i18
- br i1 false, label %bb331.i, label %bb406.i
+ br label %bb406.i
bb406.i: ; preds = %bb358.i
ret i32 0
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll Fri Jul 2 04:57:13 2010
@@ -440,3 +440,312 @@
%s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %s.1.lcssa
}
+
+; Two loops here are of particular interest; the one at %bb21, where
+; we don't want to leave extra induction variables around, or use an
+; lea to compute an exit condition inside the loop:
+
+; CHECK: test:
+
+; CHECK: BB10_4:
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT: addq $4, %r{{.*}}
+; CHECK-NEXT: decq %r{{.*}}
+; CHECK-NEXT: addq $4, %r{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: BB10_2:
+; CHECK-NEXT: testq %r{{.*}}, %r{{.*}}
+; CHECK-NEXT: jle
+; CHECK-NEXT: testb $15, %r{{.*}}
+; CHECK-NEXT: jne
+
+; And the one at %bb68, where we want to be sure to use superhero mode:
+
+; CHECK: BB10_10:
+; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps 16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 48(%r{{[^,]*}})
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addq $64, %r{{.*}}
+; CHECK-NEXT: addq $64, %r{{.*}}
+; CHECK-NEXT: addq $-16, %r{{.*}}
+; CHECK-NEXT: BB10_11:
+; CHECK-NEXT: cmpq $15, %r{{.*}}
+; CHECK-NEXT: jg
+
+define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
+bb:
+ %t = alloca float, align 4 ; <float*> [#uses=3]
+ %t7 = alloca float, align 4 ; <float*> [#uses=2]
+ %t8 = load float* %arg3 ; <float> [#uses=8]
+ %t9 = ptrtoint float* %arg to i64 ; <i64> [#uses=1]
+ %t10 = ptrtoint float* %arg4 to i64 ; <i64> [#uses=1]
+ %t11 = xor i64 %t10, %t9 ; <i64> [#uses=1]
+ %t12 = and i64 %t11, 15 ; <i64> [#uses=1]
+ %t13 = icmp eq i64 %t12, 0 ; <i1> [#uses=1]
+ %t14 = xor i64 %arg1, 1 ; <i64> [#uses=1]
+ %t15 = xor i64 %arg5, 1 ; <i64> [#uses=1]
+ %t16 = or i64 %t15, %t14 ; <i64> [#uses=1]
+ %t17 = trunc i64 %t16 to i32 ; <i32> [#uses=1]
+ %t18 = icmp eq i32 %t17, 0 ; <i1> [#uses=1]
+ br i1 %t18, label %bb19, label %bb213
+
+bb19: ; preds = %bb
+ %t20 = load float* %arg2 ; <float> [#uses=1]
+ br label %bb21
+
+bb21: ; preds = %bb32, %bb19
+ %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; <i64> [#uses=21]
+ %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
+ %t24 = sub i64 %arg6, %t22 ; <i64> [#uses=4]
+ %t25 = getelementptr float* %arg4, i64 %t22 ; <float*> [#uses=4]
+ %t26 = getelementptr float* %arg, i64 %t22 ; <float*> [#uses=3]
+ %t27 = icmp sgt i64 %t24, 0 ; <i1> [#uses=1]
+ br i1 %t27, label %bb28, label %bb37
+
+bb28: ; preds = %bb21
+ %t29 = ptrtoint float* %t25 to i64 ; <i64> [#uses=1]
+ %t30 = and i64 %t29, 15 ; <i64> [#uses=1]
+ %t31 = icmp eq i64 %t30, 0 ; <i1> [#uses=1]
+ br i1 %t31, label %bb37, label %bb32
+
+bb32: ; preds = %bb28
+ %t33 = load float* %t26 ; <float> [#uses=1]
+ %t34 = fmul float %t23, %t33 ; <float> [#uses=1]
+ store float %t34, float* %t25
+ %t35 = fadd float %t23, %t8 ; <float> [#uses=1]
+ %t36 = add i64 %t22, 1 ; <i64> [#uses=1]
+ br label %bb21
+
+bb37: ; preds = %bb28, %bb21
+ %t38 = fmul float %t8, 4.000000e+00 ; <float> [#uses=1]
+ store float %t38, float* %t
+ %t39 = fmul float %t8, 1.600000e+01 ; <float> [#uses=1]
+ store float %t39, float* %t7
+ %t40 = fmul float %t8, 0.000000e+00 ; <float> [#uses=1]
+ %t41 = fadd float %t23, %t40 ; <float> [#uses=1]
+ %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
+ %t43 = fadd float %t23, %t8 ; <float> [#uses=1]
+ %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
+ %t45 = fmul float %t8, 2.000000e+00 ; <float> [#uses=1]
+ %t46 = fadd float %t23, %t45 ; <float> [#uses=1]
+ %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
+ %t48 = fmul float %t8, 3.000000e+00 ; <float> [#uses=1]
+ %t49 = fadd float %t23, %t48 ; <float> [#uses=1]
+ %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
+ %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
+ %t52 = fadd <4 x float> %t50, %t51 ; <<4 x float>> [#uses=3]
+ %t53 = fadd <4 x float> %t52, %t51 ; <<4 x float>> [#uses=3]
+ %t54 = fadd <4 x float> %t53, %t51 ; <<4 x float>> [#uses=2]
+ %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
+ %t56 = icmp sgt i64 %t24, 15 ; <i1> [#uses=2]
+ br i1 %t13, label %bb57, label %bb118
+
+bb57: ; preds = %bb37
+ br i1 %t56, label %bb61, label %bb112
+
+bb58: ; preds = %bb68
+ %t59 = getelementptr float* %arg, i64 %t78 ; <float*> [#uses=1]
+ %t60 = getelementptr float* %arg4, i64 %t78 ; <float*> [#uses=1]
+ br label %bb112
+
+bb61: ; preds = %bb57
+ %t62 = add i64 %t22, 16 ; <i64> [#uses=1]
+ %t63 = add i64 %t22, 4 ; <i64> [#uses=1]
+ %t64 = add i64 %t22, 8 ; <i64> [#uses=1]
+ %t65 = add i64 %t22, 12 ; <i64> [#uses=1]
+ %t66 = add i64 %arg6, -16 ; <i64> [#uses=1]
+ %t67 = sub i64 %t66, %t22 ; <i64> [#uses=1]
+ br label %bb68
+
+bb68: ; preds = %bb68, %bb61
+ %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ] ; <i64> [#uses=3]
+ %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t74 = shl i64 %t69, 4 ; <i64> [#uses=5]
+ %t75 = add i64 %t22, %t74 ; <i64> [#uses=2]
+ %t76 = getelementptr float* %arg, i64 %t75 ; <float*> [#uses=1]
+ %t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t78 = add i64 %t62, %t74 ; <i64> [#uses=2]
+ %t79 = add i64 %t63, %t74 ; <i64> [#uses=2]
+ %t80 = getelementptr float* %arg, i64 %t79 ; <float*> [#uses=1]
+ %t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t82 = add i64 %t64, %t74 ; <i64> [#uses=2]
+ %t83 = getelementptr float* %arg, i64 %t82 ; <float*> [#uses=1]
+ %t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t85 = add i64 %t65, %t74 ; <i64> [#uses=2]
+ %t86 = getelementptr float* %arg, i64 %t85 ; <float*> [#uses=1]
+ %t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t88 = getelementptr float* %arg4, i64 %t75 ; <float*> [#uses=1]
+ %t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t90 = getelementptr float* %arg4, i64 %t79 ; <float*> [#uses=1]
+ %t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t92 = getelementptr float* %arg4, i64 %t82 ; <float*> [#uses=1]
+ %t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t94 = getelementptr float* %arg4, i64 %t85 ; <float*> [#uses=1]
+ %t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t96 = mul i64 %t69, -16 ; <i64> [#uses=1]
+ %t97 = add i64 %t67, %t96 ; <i64> [#uses=2]
+ %t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1]
+ %t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1]
+ %t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1]
+ %t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1]
+ %t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1]
+ %t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2]
+ %t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1]
+ %t105 = fmul <4 x float> %t100, %t72 ; <<4 x float>> [#uses=1]
+ %t106 = fmul <4 x float> %t101, %t70 ; <<4 x float>> [#uses=1]
+ store <4 x float> %t102, <4 x float>* %t89
+ store <4 x float> %t104, <4 x float>* %t91
+ store <4 x float> %t105, <4 x float>* %t93
+ store <4 x float> %t106, <4 x float>* %t95
+ %t107 = fadd <4 x float> %t70, %t55 ; <<4 x float>> [#uses=1]
+ %t108 = fadd <4 x float> %t72, %t55 ; <<4 x float>> [#uses=1]
+ %t109 = fadd <4 x float> %t73, %t55 ; <<4 x float>> [#uses=1]
+ %t110 = icmp sgt i64 %t97, 15 ; <i1> [#uses=1]
+ %t111 = add i64 %t69, 1 ; <i64> [#uses=1]
+ br i1 %t110, label %bb68, label %bb58
+
+bb112: ; preds = %bb58, %bb57
+ %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
+ %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
+ %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
+ %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
+ %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
+ br label %bb194
+
+bb118: ; preds = %bb37
+ br i1 %t56, label %bb122, label %bb194
+
+bb119: ; preds = %bb137
+ %t120 = getelementptr float* %arg, i64 %t145 ; <float*> [#uses=1]
+ %t121 = getelementptr float* %arg4, i64 %t145 ; <float*> [#uses=1]
+ br label %bb194
+
+bb122: ; preds = %bb118
+ %t123 = add i64 %t22, -1 ; <i64> [#uses=1]
+ %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+ %t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1]
+ %t127 = add i64 %t22, 16 ; <i64> [#uses=1]
+ %t128 = add i64 %t22, 3 ; <i64> [#uses=1]
+ %t129 = add i64 %t22, 7 ; <i64> [#uses=1]
+ %t130 = add i64 %t22, 11 ; <i64> [#uses=1]
+ %t131 = add i64 %t22, 15 ; <i64> [#uses=1]
+ %t132 = add i64 %t22, 4 ; <i64> [#uses=1]
+ %t133 = add i64 %t22, 8 ; <i64> [#uses=1]
+ %t134 = add i64 %t22, 12 ; <i64> [#uses=1]
+ %t135 = add i64 %arg6, -16 ; <i64> [#uses=1]
+ %t136 = sub i64 %t135, %t22 ; <i64> [#uses=1]
+ br label %bb137
+
+bb137: ; preds = %bb137, %bb122
+ %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
+ %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
+ %t144 = shl i64 %t138, 4 ; <i64> [#uses=9]
+ %t145 = add i64 %t127, %t144 ; <i64> [#uses=2]
+ %t146 = add i64 %t128, %t144 ; <i64> [#uses=1]
+ %t147 = getelementptr float* %arg, i64 %t146 ; <float*> [#uses=1]
+ %t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t149 = add i64 %t129, %t144 ; <i64> [#uses=1]
+ %t150 = getelementptr float* %arg, i64 %t149 ; <float*> [#uses=1]
+ %t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t152 = add i64 %t130, %t144 ; <i64> [#uses=1]
+ %t153 = getelementptr float* %arg, i64 %t152 ; <float*> [#uses=1]
+ %t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t155 = add i64 %t131, %t144 ; <i64> [#uses=1]
+ %t156 = getelementptr float* %arg, i64 %t155 ; <float*> [#uses=1]
+ %t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t158 = add i64 %t22, %t144 ; <i64> [#uses=1]
+ %t159 = getelementptr float* %arg4, i64 %t158 ; <float*> [#uses=1]
+ %t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t161 = add i64 %t132, %t144 ; <i64> [#uses=1]
+ %t162 = getelementptr float* %arg4, i64 %t161 ; <float*> [#uses=1]
+ %t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t164 = add i64 %t133, %t144 ; <i64> [#uses=1]
+ %t165 = getelementptr float* %arg4, i64 %t164 ; <float*> [#uses=1]
+ %t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t167 = add i64 %t134, %t144 ; <i64> [#uses=1]
+ %t168 = getelementptr float* %arg4, i64 %t167 ; <float*> [#uses=1]
+ %t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t170 = mul i64 %t138, -16 ; <i64> [#uses=1]
+ %t171 = add i64 %t136, %t170 ; <i64> [#uses=2]
+ %t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2]
+ %t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2]
+ %t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2]
+ %t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2]
+ %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t184 = fmul <4 x float> %t177, %t140 ; <<4 x float>> [#uses=1]
+ %t185 = fadd <4 x float> %t140, %t55 ; <<4 x float>> [#uses=2]
+ %t186 = fmul <4 x float> %t179, %t142 ; <<4 x float>> [#uses=1]
+ %t187 = fmul <4 x float> %t181, %t141 ; <<4 x float>> [#uses=1]
+ %t188 = fmul <4 x float> %t183, %t139 ; <<4 x float>> [#uses=1]
+ store <4 x float> %t184, <4 x float>* %t160
+ store <4 x float> %t186, <4 x float>* %t163
+ store <4 x float> %t187, <4 x float>* %t166
+ store <4 x float> %t188, <4 x float>* %t169
+ %t189 = fadd <4 x float> %t139, %t55 ; <<4 x float>> [#uses=1]
+ %t190 = fadd <4 x float> %t141, %t55 ; <<4 x float>> [#uses=1]
+ %t191 = fadd <4 x float> %t142, %t55 ; <<4 x float>> [#uses=1]
+ %t192 = icmp sgt i64 %t171, 15 ; <i1> [#uses=1]
+ %t193 = add i64 %t138, 1 ; <i64> [#uses=1]
+ br i1 %t192, label %bb137, label %bb119
+
+bb194: ; preds = %bb119, %bb118, %bb112
+ %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
+ %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
+ %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
+ %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
+ %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
+ %t200 = icmp sgt i64 %t195, 0 ; <i1> [#uses=1]
+ br i1 %t200, label %bb201, label %bb211
+
+bb201: ; preds = %bb201, %bb194
+ %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
+ %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
+ %t204 = getelementptr float* %t198, i64 %t202 ; <float*> [#uses=1]
+ %t205 = getelementptr float* %t197, i64 %t202 ; <float*> [#uses=1]
+ %t206 = load float* %t204 ; <float> [#uses=1]
+ %t207 = fmul float %t203, %t206 ; <float> [#uses=1]
+ store float %t207, float* %t205
+ %t208 = fadd float %t203, %t8 ; <float> [#uses=2]
+ %t209 = add i64 %t202, 1 ; <i64> [#uses=2]
+ %t210 = icmp eq i64 %t209, %t195 ; <i1> [#uses=1]
+ br i1 %t210, label %bb211, label %bb201
+
+bb211: ; preds = %bb201, %bb194
+ %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
+ store float %t212, float* %arg2
+ ret void
+
+bb213: ; preds = %bb
+ ret void
+}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll Fri Jul 2 04:57:13 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
; LSR's OptimizeMax should eliminate the select (max).
@@ -30,3 +30,47 @@
for.end: ; preds = %for.body, %entry
ret void
}
+
+; In this case, one of the max operands is another max, which folds,
+; leaving a two-operand max which doesn't fit the usual pattern.
+; OptimizeMax should handle this case.
+; PR7454
+
+; CHECK: _Z18GenerateStatusPagei:
+
+; CHECK: jle
+; CHECK-NOT: cmov
+; CHECK: xorl %edi, %edi
+; CHECK-NEXT: align
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT: callq
+; CHECK-NEXT: incl %ebx
+; CHECK-NEXT: cmpl %r14d, %ebx
+; CHECK-NEXT: movq %rax, %rdi
+; CHECK-NEXT: jl
+
+define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
+entry:
+ %cmp.i = icmp sgt i32 %jobs_to_display, 0 ; <i1> [#uses=1]
+ %tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3]
+ %cmp8 = icmp sgt i32 %tmp, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %bb.nph, label %for.end
+
+bb.nph: ; preds = %entry
+ %tmp11 = icmp sgt i32 %tmp, 1 ; <i1> [#uses=1]
+ %smax = select i1 %tmp11, i32 %tmp, i32 1 ; <i32> [#uses=1]
+ br label %for.body
+
+for.body: ; preds = %for.body, %bb.nph
+ %i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1]
+ %it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1]
+ %call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1]
+ %inc = add nsw i32 %i.010, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %inc, %smax ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*)
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll Fri Jul 2 04:57:13 2010
@@ -189,7 +189,7 @@
; LINUX: call .L7$pb
; LINUX: .L7$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
-; LINUX: addl .LJTI7_0 at GOTOFF(
+; LINUX: .LJTI7_0 at GOTOFF(
; LINUX: jmpl *
; LINUX: .LJTI7_0:
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll Fri Jul 2 04:57:13 2010
@@ -17,7 +17,7 @@
; CHECK: %forcond.preheader.forbody_crit_edge
; CHECK: movl $1
; CHECK-NOT: xorl
-; CHECK-NEXT: movl $1
+; CHECK-NEXT: movl
ifthen: ; preds = %entry
ret i32 0
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll Fri Jul 2 04:57:13 2010
@@ -63,10 +63,10 @@
ret <8 x i16> %tmp
; X64: t4:
; X64: pextrw $7, %xmm0, %eax
-; X64: pshufhw $100, %xmm0, %xmm2
-; X64: pinsrw $1, %eax, %xmm2
+; X64: pshufhw $100, %xmm0, %xmm1
+; X64: pinsrw $1, %eax, %xmm1
; X64: pextrw $1, %xmm0, %eax
-; X64: movdqa %xmm2, %xmm0
+; X64: movdqa %xmm1, %xmm0
; X64: pinsrw $4, %eax, %xmm0
; X64: ret
}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll Fri Jul 2 04:57:13 2010
@@ -9,14 +9,15 @@
define void @test({ double, double }* byval %z, double* %P) {
entry:
+ %tmp3 = load double* @G, align 16 ; <double> [#uses=1]
+ %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
+ volatile store double %tmp4, double* %P
%tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp1 = load double* %tmp, align 8 ; <double> [#uses=1]
+ %tmp1 = volatile load double* %tmp, align 8 ; <double> [#uses=1]
%tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1]
; CHECK: andpd{{.*}}4(%esp), %xmm
- %tmp3 = load double* @G, align 16 ; <double> [#uses=1]
- %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
%tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1]
- store double %tmp6, double* %P, align 8
+ volatile store double %tmp6, double* %P, align 8
ret void
}
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll Fri Jul 2 04:57:13 2010
@@ -2,9 +2,11 @@
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl %edi, %eax
+; CHECK: movl 32(%rsp), %eax
; Move param %in1 to temp register (%r10d).
; CHECK: movl 40(%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl %edi, %eax
; Move param %in2 to stack.
; CHECK: movl %r10d, 32(%rsp)
; Move result of addition to stack.
Modified: llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll Fri Jul 2 04:57:13 2010
@@ -4,7 +4,7 @@
; RUN: grep movups %t | count 2
target triple = "i686-apple-darwin"
- at x = global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ] ; <[4 x i32]*> [#uses=4]
+ at x = external global [4 x i32]
define <2 x i64> @test1() {
%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
Modified: llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll (original)
+++ llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll Fri Jul 2 04:57:13 2010
@@ -41,4 +41,4 @@
;CHECK: DEBUG_VALUE: bar:x <- EBX+0
;CHECK-NEXT:Ltmp
-;CHECK-NEXT DEBUG_VALUE: foo:y <- 1+0
+;CHECK-NEXT: DEBUG_VALUE: foo:y <- 1+0
Removed: llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/FrontendC%2B%2B/thunk-weak-odr.cpp?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp (original)
+++ llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp (removed)
@@ -1,33 +0,0 @@
-// RUN: %llvmgxx %s -S -o - | FileCheck %s
-// <rdar://problem/7929157>
-
-struct A {
- virtual int f() { return 1; }
-};
-
-struct B {
- virtual int f() { return 2; }
-};
-
-struct C : A, B {
- virtual int f() { return 3; }
-};
-
-struct D : C {
- virtual int f() { return 4; }
-};
-
-static int f(D* d) {
- B* b = d;
- return b->f();
-};
-
-int g() {
- D d;
- return f(&d);
-}
-
-// Thunks should be marked as "weak ODR", not just "weak".
-//
-// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1C1fEv
-// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1D1fEv
Modified: llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c (original)
+++ llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c Fri Jul 2 04:57:13 2010
@@ -12,5 +12,5 @@
:"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%""rax", "memory");
-// CHECK: # (%rsp) %rsi %rdi %rcx
+// CHECK: # %ecx %rsi %rdi %rdx
}
Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s Fri Jul 2 04:57:13 2010
@@ -9967,12 +9967,6 @@
// CHECK: encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x80]
aeskeygenassist $128, %xmm1, %xmm2
-// rdar://7840289
-// CHECK: pshufb CPI1_0(%rip), %xmm1
-// CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
-// CHECK: fixup A - offset: 5, value: CPI1_0-4
-pshufb CPI1_0(%rip), %xmm1
-
// rdar://7910087
// CHECK: bsfw %bx, %bx
// CHECK: encoding: [0x66,0x0f,0xbc,0xdb]
@@ -10212,3 +10206,1411 @@
// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: vmaxps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+ vmaxps %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+ vmaxpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+ vminps %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+ vminpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+ vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+ vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+ vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+ vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
+ vandps %xmm2, %xmm4, %xmm6
+
+// CHECK: vandpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
+ vandpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
+ vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
+ vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
+ vorps %xmm2, %xmm4, %xmm6
+
+// CHECK: vorpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
+ vorpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
+ vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
+ vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
+ vxorps %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
+ vxorpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
+ vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
+ vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
+ vandnps %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
+ vandnpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
+ vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
+ vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+ vmovss -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+ vmovss %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+ vmovsd -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+ vmovsd %xmm4, %xmm2, %xmm5
+
+// CHECK: vunpckhps %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
+ vunpckhps %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
+ vunpckhpd %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklps %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
+ vunpcklps %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
+ vunpcklpd %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
+ vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
+ vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
+ vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
+ vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
+ vcmpps $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
+ vcmpps $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
+ vcmpps $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
+ vcmppd $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
+ vcmppd $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
+ vcmppd $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
+ vshufps $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
+ vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
+ vshufpd $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
+ vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
+ vcmpeqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
+ vcmpleps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
+ vcmpltps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
+ vcmpneqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
+ vcmpnleps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
+ vcmpnltps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
+ vcmpordps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
+ vcmpunordps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
+ vcmpeqpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
+ vcmplepd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
+ vcmpltpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
+ vcmpneqpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
+ vcmpnlepd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
+ vcmpnltpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
+ vcmpordpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
+ vcmpunordpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vmovmskps %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+ vmovmskps %xmm2, %eax
+
+// CHECK: vmovmskpd %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+ vmovmskpd %xmm2, %eax
+
+// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
+ vcmpeqss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
+ vcmpless %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
+ vcmpltss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
+ vcmpneqss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
+ vcmpnless %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
+ vcmpnltss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
+ vcmpordss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
+ vcmpunordss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
+ vcmpeqsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
+ vcmplesd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
+ vcmpltsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
+ vcmpneqsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
+ vcmpnlesd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
+ vcmpnltsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
+ vcmpordsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
+ vcmpunordsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vucomiss %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
+ vucomiss %xmm1, %xmm2
+
+// CHECK: vucomiss (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
+ vucomiss (%eax), %xmm2
+
+// CHECK: vcomiss %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
+ vcomiss %xmm1, %xmm2
+
+// CHECK: vcomiss (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
+ vcomiss (%eax), %xmm2
+
+// CHECK: vucomisd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
+ vucomisd %xmm1, %xmm2
+
+// CHECK: vucomisd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
+ vucomisd (%eax), %xmm2
+
+// CHECK: vcomisd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
+ vcomisd %xmm1, %xmm2
+
+// CHECK: vcomisd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
+ vcomisd (%eax), %xmm2
+
+// CHECK: vcvttss2si %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
+ vcvttss2si %xmm1, %eax
+
+// CHECK: vcvttss2si (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+ vcvttss2si (%ecx), %eax
+
+// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+ vcvtsi2ss (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+ vcvtsi2ss (%eax), %xmm1, %xmm2
+
+// CHECK: vcvttsd2si %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
+ vcvttsd2si %xmm1, %eax
+
+// CHECK: vcvttsd2si (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+ vcvttsd2si (%ecx), %eax
+
+// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+ vcvtsi2sd (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+ vcvtsi2sd (%eax), %xmm1, %xmm2
+
+// CHECK: vmovaps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
+ vmovaps (%eax), %xmm2
+
+// CHECK: vmovaps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
+ vmovaps %xmm1, %xmm2
+
+// CHECK: vmovaps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
+ vmovaps %xmm1, (%eax)
+
+// CHECK: vmovapd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
+ vmovapd (%eax), %xmm2
+
+// CHECK: vmovapd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
+ vmovapd %xmm1, %xmm2
+
+// CHECK: vmovapd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
+ vmovapd %xmm1, (%eax)
+
+// CHECK: vmovups (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
+ vmovups (%eax), %xmm2
+
+// CHECK: vmovups %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
+ vmovups %xmm1, %xmm2
+
+// CHECK: vmovups %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
+ vmovups %xmm1, (%eax)
+
+// CHECK: vmovupd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
+ vmovupd (%eax), %xmm2
+
+// CHECK: vmovupd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
+ vmovupd %xmm1, %xmm2
+
+// CHECK: vmovupd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
+ vmovupd %xmm1, (%eax)
+
+// CHECK: vmovlps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
+ vmovlps %xmm1, (%eax)
+
+// CHECK: vmovlps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
+ vmovlps (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
+ vmovlpd %xmm1, (%eax)
+
+// CHECK: vmovlpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
+ vmovlpd (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
+ vmovhps %xmm1, (%eax)
+
+// CHECK: vmovhps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
+ vmovhps (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
+ vmovhpd %xmm1, (%eax)
+
+// CHECK: vmovhpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
+ vmovhpd (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlhps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
+ vmovlhps %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovhlps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
+ vmovhlps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcvtss2sil %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+ vcvtss2si %xmm1, %eax
+
+// CHECK: vcvtss2sil (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+ vcvtss2si (%eax), %ebx
+
+// CHECK: vcvtdq2ps %xmm5, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
+ vcvtdq2ps %xmm5, %xmm6
+
+// CHECK: vcvtdq2ps (%eax), %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
+ vcvtdq2ps (%eax), %xmm6
+
+// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
+ vcvtsd2ss %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
+ vcvtsd2ss (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtps2dq %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
+ vcvtps2dq %xmm2, %xmm3
+
+// CHECK: vcvtps2dq (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
+ vcvtps2dq (%eax), %xmm3
+
+// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
+ vcvtss2sd %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
+ vcvtss2sd (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
+ vcvtdq2ps %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps (%ecx), %xmm4
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
+ vcvtdq2ps (%ecx), %xmm4
+
+// CHECK: vcvttps2dq %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
+ vcvttps2dq %xmm2, %xmm3
+
+// CHECK: vcvttps2dq (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
+ vcvttps2dq (%eax), %xmm3
+
+// CHECK: vcvtps2pd %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
+ vcvtps2pd %xmm2, %xmm3
+
+// CHECK: vcvtps2pd (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
+ vcvtps2pd (%eax), %xmm3
+
+// CHECK: vcvtpd2ps %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
+ vcvtpd2ps %xmm2, %xmm3
+
+// CHECK: vsqrtpd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
+ vsqrtpd %xmm1, %xmm2
+
+// CHECK: vsqrtpd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
+ vsqrtpd (%eax), %xmm2
+
+// CHECK: vsqrtps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
+ vsqrtps %xmm1, %xmm2
+
+// CHECK: vsqrtps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
+ vsqrtps (%eax), %xmm2
+
+// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
+ vsqrtsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtsd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
+ vsqrtsd (%eax), %xmm2, %xmm3
+
+// CHECK: vsqrtss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
+ vsqrtss %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0x18]
+ vsqrtss (%eax), %xmm2, %xmm3
+
+// CHECK: vrsqrtps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
+ vrsqrtps %xmm1, %xmm2
+
+// CHECK: vrsqrtps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
+ vrsqrtps (%eax), %xmm2
+
+// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
+ vrsqrtss %xmm1, %xmm2, %xmm3
+
+// CHECK: vrsqrtss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0x18]
+ vrsqrtss (%eax), %xmm2, %xmm3
+
+// CHECK: vrcpps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
+ vrcpps %xmm1, %xmm2
+
+// CHECK: vrcpps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
+ vrcpps (%eax), %xmm2
+
+// CHECK: vrcpss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
+ vrcpss %xmm1, %xmm2, %xmm3
+
+// CHECK: vrcpss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0x18]
+ vrcpss (%eax), %xmm2, %xmm3
+
+// CHECK: vmovntdq %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
+ vmovntdq %xmm1, (%eax)
+
+// CHECK: vmovntpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
+ vmovntpd %xmm1, (%eax)
+
+// CHECK: vmovntps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
+ vmovntps %xmm1, (%eax)
+
+// CHECK: vldmxcsr (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
+ vldmxcsr (%eax)
+
+// CHECK: vstmxcsr (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
+ vstmxcsr (%eax)
+
+// CHECK: vldmxcsr 3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
+ vldmxcsr 0xdeadbeef
+
+// CHECK: vstmxcsr 3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
+ vstmxcsr 0xdeadbeef
+
+// CHECK: vpsubb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
+ vpsubb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
+ vpsubb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
+ vpsubw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
+ vpsubw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
+ vpsubd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
+ vpsubd (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
+ vpsubq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
+ vpsubq (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
+ vpsubsb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
+ vpsubsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
+ vpsubsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
+ vpsubsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
+ vpsubusb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
+ vpsubusb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
+ vpsubusw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
+ vpsubusw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
+ vpaddb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
+ vpaddb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
+ vpaddw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
+ vpaddw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
+ vpaddd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
+ vpaddd (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
+ vpaddq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
+ vpaddq (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
+ vpaddsb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
+ vpaddsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
+ vpaddsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
+ vpaddsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
+ vpaddusb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
+ vpaddusb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
+ vpaddusw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
+ vpaddusw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
+ vpmulhuw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhuw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
+ vpmulhuw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
+ vpmulhw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
+ vpmulhw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmullw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
+ vpmullw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmullw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
+ vpmullw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuludq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
+ vpmuludq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmuludq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
+ vpmuludq (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
+ vpavgb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
+ vpavgb (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
+ vpavgw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
+ vpavgw (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
+ vpminsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
+ vpminsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpminub %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
+ vpminub %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminub (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
+ vpminub (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
+ vpmaxsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
+ vpmaxsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxub %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
+ vpmaxub %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxub (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
+ vpmaxub (%eax), %xmm2, %xmm3
+
+// CHECK: vpsadbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
+ vpsadbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsadbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
+ vpsadbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
+ vpsllw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
+ vpsllw (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
+ vpslld %xmm1, %xmm2, %xmm3
+
+// CHECK: vpslld (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
+ vpslld (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
+ vpsllq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
+ vpsllq (%eax), %xmm2, %xmm3
+
+// CHECK: vpsraw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
+ vpsraw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsraw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
+ vpsraw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrad %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
+ vpsrad %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrad (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
+ vpsrad (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
+ vpsrlw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
+ vpsrlw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrld %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
+ vpsrld %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrld (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
+ vpsrld (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
+ vpsrlq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
+ vpsrlq (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+ vpslld $10, %xmm2, %xmm3
+
+// CHECK: vpslldq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
+ vpslldq $10, %xmm2, %xmm3
+
+// CHECK: vpsllq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
+ vpsllq $10, %xmm2, %xmm3
+
+// CHECK: vpsllw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
+ vpsllw $10, %xmm2, %xmm3
+
+// CHECK: vpsrad $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
+ vpsrad $10, %xmm2, %xmm3
+
+// CHECK: vpsraw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
+ vpsraw $10, %xmm2, %xmm3
+
+// CHECK: vpsrld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
+ vpsrld $10, %xmm2, %xmm3
+
+// CHECK: vpsrldq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
+ vpsrldq $10, %xmm2, %xmm3
+
+// CHECK: vpsrlq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
+ vpsrlq $10, %xmm2, %xmm3
+
+// CHECK: vpsrlw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
+ vpsrlw $10, %xmm2, %xmm3
+
+// CHECK: vpslld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+ vpslld $10, %xmm2, %xmm3
+
+// CHECK: vpand %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
+ vpand %xmm1, %xmm2, %xmm3
+
+// CHECK: vpand (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
+ vpand (%eax), %xmm2, %xmm3
+
+// CHECK: vpor %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
+ vpor %xmm1, %xmm2, %xmm3
+
+// CHECK: vpor (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
+ vpor (%eax), %xmm2, %xmm3
+
+// CHECK: vpxor %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
+ vpxor %xmm1, %xmm2, %xmm3
+
+// CHECK: vpxor (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
+ vpxor (%eax), %xmm2, %xmm3
+
+// CHECK: vpandn %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
+ vpandn %xmm1, %xmm2, %xmm3
+
+// CHECK: vpandn (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
+ vpandn (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
+ vpcmpeqb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
+ vpcmpeqb (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
+ vpcmpeqw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
+ vpcmpeqw (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
+ vpcmpeqd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
+ vpcmpeqd (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
+ vpcmpgtb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
+ vpcmpgtb (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
+ vpcmpgtw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
+ vpcmpgtw (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
+ vpcmpgtd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
+ vpcmpgtd (%eax), %xmm2, %xmm3
+
+// CHECK: vpacksswb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
+ vpacksswb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpacksswb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
+ vpacksswb (%eax), %xmm2, %xmm3
+
+// CHECK: vpackssdw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
+ vpackssdw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackssdw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
+ vpackssdw (%eax), %xmm2, %xmm3
+
+// CHECK: vpackuswb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
+ vpackuswb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackuswb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
+ vpackuswb (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufd $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
+ vpshufd $4, %xmm2, %xmm3
+
+// CHECK: vpshufd $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
+ vpshufd $4, (%eax), %xmm3
+
+// CHECK: vpshufhw $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
+ vpshufhw $4, %xmm2, %xmm3
+
+// CHECK: vpshufhw $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
+ vpshufhw $4, (%eax), %xmm3
+
+// CHECK: vpshuflw $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
+ vpshuflw $4, %xmm2, %xmm3
+
+// CHECK: vpshuflw $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
+ vpshuflw $4, (%eax), %xmm3
+
+// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
+ vpunpcklbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
+ vpunpcklbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
+ vpunpcklwd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
+ vpunpcklwd (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
+ vpunpckldq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckldq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
+ vpunpckldq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
+ vpunpcklqdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
+ vpunpcklqdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
+ vpunpckhbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
+ vpunpckhbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
+ vpunpckhwd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
+ vpunpckhwd (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
+ vpunpckhdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
+ vpunpckhdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
+ vpunpckhqdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
+ vpunpckhqdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
+ vpinsrw $7, %eax, %xmm2, %xmm3
+
+// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
+ vpinsrw $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vpextrw $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+ vpextrw $7, %xmm2, %eax
+
+// CHECK: vpmovmskb %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
+ vpmovmskb %xmm1, %eax
+
+// CHECK: vmaskmovdqu %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
+ vmaskmovdqu %xmm1, %xmm2
+
+// CHECK: vmovd %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+ vmovd %xmm1, %eax
+
+// CHECK: vmovd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
+ vmovd %xmm1, (%eax)
+
+// CHECK: vmovd %eax, %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
+ vmovd %eax, %xmm1
+
+// CHECK: vmovd (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
+ vmovd (%eax), %xmm1
+
+// CHECK: vmovq %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
+ vmovq %xmm1, (%eax)
+
+// CHECK: vmovq %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
+ vmovq %xmm1, %xmm2
+
+// CHECK: vmovq (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
+ vmovq (%eax), %xmm1
+
+// CHECK: vcvtpd2dq %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
+ vcvtpd2dq %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
+ vcvtdq2pd %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
+ vcvtdq2pd (%eax), %xmm2
+
+// CHECK: vmovshdup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
+ vmovshdup %xmm1, %xmm2
+
+// CHECK: vmovshdup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
+ vmovshdup (%eax), %xmm2
+
+// CHECK: vmovsldup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
+ vmovsldup %xmm1, %xmm2
+
+// CHECK: vmovsldup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
+ vmovsldup (%eax), %xmm2
+
+// CHECK: vmovddup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
+ vmovddup %xmm1, %xmm2
+
+// CHECK: vmovddup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
+ vmovddup (%eax), %xmm2
+
+// CHECK: vaddsubps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
+ vaddsubps %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubps (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
+ vaddsubps (%eax), %xmm1, %xmm2
+
+// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
+ vaddsubpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubpd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
+ vaddsubpd (%eax), %xmm1, %xmm2
+
+// CHECK: vhaddps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
+ vhaddps %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
+ vhaddps (%eax), %xmm2, %xmm3
+
+// CHECK: vhaddpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
+ vhaddpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
+ vhaddpd (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
+ vhsubps %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
+ vhsubps (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
+ vhsubpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
+ vhsubpd (%eax), %xmm2, %xmm3
+
Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s Fri Jul 2 04:57:13 2010
@@ -393,3 +393,25 @@
// CHECK: wait
// CHECK: encoding: [0x9b]
fwait
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x05,0x7c,0x00,0x00,0x00]
+// FIXME: This is a correct bug poor encoding: Use 65 a1 7c 00 00 00
+ movl %gs:124, %eax
+
+// CHECK: pusha
+// CHECK: encoding: [0x60]
+ pusha
+
+// CHECK: popa
+// CHECK: encoding: [0x61]
+ popa
+
+// CHECK: pushal
+// CHECK: encoding: [0x60]
+ pushal
+
+// CHECK: popal
+// CHECK: encoding: [0x61]
+ popal
+
Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s Fri Jul 2 04:57:13 2010
@@ -264,3 +264,1401 @@
// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: vmaxps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+ vmaxps %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+ vmaxpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+ vminps %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+ vminpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+ vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+ vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+ vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+ vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
+ vandps %xmm10, %xmm14, %xmm12
+
+// CHECK: vandpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
+ vandpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
+ vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
+ vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
+ vorps %xmm10, %xmm14, %xmm12
+
+// CHECK: vorpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
+ vorpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
+ vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
+ vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
+ vxorps %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
+ vxorpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
+ vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
+ vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
+ vandnps %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
+ vandnpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
+ vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
+ vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+ vmovss -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+ vmovss %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+ vmovsd -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+ vmovsd %xmm14, %xmm10, %xmm15
+
+// rdar://7840289
+// CHECK: pshufb CPI1_0(%rip), %xmm1
+// CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
+// CHECK: fixup A - offset: 5, value: CPI1_0-4
+pshufb CPI1_0(%rip), %xmm1
+
+// CHECK: vunpckhps %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
+ vunpckhps %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
+ vunpckhpd %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklps %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
+ vunpcklps %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
+ vunpcklpd %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
+ vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
+ vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
+ vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
+ vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
+ vcmpps $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
+ vcmpps $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
+ vcmpps $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
+ vcmppd $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
+ vcmppd $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
+ vcmppd $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
+ vshufps $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
+ vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
+ vshufpd $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
+ vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
+ vcmpeqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
+ vcmpleps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
+ vcmpltps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
+ vcmpneqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
+ vcmpnleps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
+ vcmpnltps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
+ vcmpordps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
+ vcmpunordps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
+ vcmpeqpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
+ vcmplepd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
+ vcmpltpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
+ vcmpneqpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
+ vcmpnlepd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
+ vcmpnltpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
+ vcmpordpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
+ vcmpunordpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
+ vcmpeqss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
+ vcmpless %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
+ vcmpltss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
+ vcmpneqss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
+ vcmpnless %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
+ vcmpnltss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
+ vcmpordss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
+ vcmpunordss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
+ vcmpeqsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
+ vcmplesd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
+ vcmpltsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
+ vcmpneqsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
+ vcmpnlesd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
+ vcmpnltsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
+ vcmpordsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
+ vcmpunordsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vucomiss %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
+ vucomiss %xmm11, %xmm12
+
+// CHECK: vucomiss (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
+ vucomiss (%rax), %xmm12
+
+// CHECK: vcomiss %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
+ vcomiss %xmm11, %xmm12
+
+// CHECK: vcomiss (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
+ vcomiss (%rax), %xmm12
+
+// CHECK: vucomisd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
+ vucomisd %xmm11, %xmm12
+
+// CHECK: vucomisd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
+ vucomisd (%rax), %xmm12
+
+// CHECK: vcomisd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
+ vcomisd %xmm11, %xmm12
+
+// CHECK: vcomisd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
+ vcomisd (%rax), %xmm12
+
+// CHECK: vcvttss2si (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+ vcvttss2si (%rcx), %eax
+
+// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+ vcvtsi2ss (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+ vcvtsi2ss (%rax), %xmm11, %xmm12
+
+// CHECK: vcvttsd2si (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+ vcvttsd2si (%rcx), %eax
+
+// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+ vcvtsi2sd (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+ vcvtsi2sd (%rax), %xmm11, %xmm12
+
+// CHECK: vmovaps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x28,0x20]
+ vmovaps (%rax), %xmm12
+
+// CHECK: vmovaps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
+ vmovaps %xmm11, %xmm12
+
+// CHECK: vmovaps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x29,0x18]
+ vmovaps %xmm11, (%rax)
+
+// CHECK: vmovapd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x28,0x20]
+ vmovapd (%rax), %xmm12
+
+// CHECK: vmovapd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
+ vmovapd %xmm11, %xmm12
+
+// CHECK: vmovapd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x29,0x18]
+ vmovapd %xmm11, (%rax)
+
+// CHECK: vmovups (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x10,0x20]
+ vmovups (%rax), %xmm12
+
+// CHECK: vmovups %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
+ vmovups %xmm11, %xmm12
+
+// CHECK: vmovups %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x11,0x18]
+ vmovups %xmm11, (%rax)
+
+// CHECK: vmovupd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x10,0x20]
+ vmovupd (%rax), %xmm12
+
+// CHECK: vmovupd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
+ vmovupd %xmm11, %xmm12
+
+// CHECK: vmovupd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x11,0x18]
+ vmovupd %xmm11, (%rax)
+
+// CHECK: vmovlps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x13,0x18]
+ vmovlps %xmm11, (%rax)
+
+// CHECK: vmovlps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x12,0x28]
+ vmovlps (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x13,0x18]
+ vmovlpd %xmm11, (%rax)
+
+// CHECK: vmovlpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x12,0x28]
+ vmovlpd (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x17,0x18]
+ vmovhps %xmm11, (%rax)
+
+// CHECK: vmovhps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x16,0x28]
+ vmovhps (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x17,0x18]
+ vmovhpd %xmm11, (%rax)
+
+// CHECK: vmovhpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x16,0x28]
+ vmovhpd (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlhps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
+ vmovlhps %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovhlps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
+ vmovhlps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcvtss2sil %xmm11, %eax
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
+ vcvtss2si %xmm11, %eax
+
+// CHECK: vcvtss2sil (%rax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+ vcvtss2si (%rax), %ebx
+
+// CHECK: vcvtdq2ps %xmm10, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
+ vcvtdq2ps %xmm10, %xmm12
+
+// CHECK: vcvtdq2ps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
+ vcvtdq2ps (%rax), %xmm12
+
+// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
+ vcvtsd2ss %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
+ vcvtsd2ss (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtps2dq %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
+ vcvtps2dq %xmm12, %xmm11
+
+// CHECK: vcvtps2dq (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
+ vcvtps2dq (%rax), %xmm11
+
+// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
+ vcvtss2sd %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
+ vcvtss2sd (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
+ vcvtdq2ps %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps (%ecx), %xmm13
+// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
+ vcvtdq2ps (%ecx), %xmm13
+
+// CHECK: vcvttps2dq %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
+ vcvttps2dq %xmm12, %xmm11
+
+// CHECK: vcvttps2dq (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
+ vcvttps2dq (%rax), %xmm11
+
+// CHECK: vcvtps2pd %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
+ vcvtps2pd %xmm12, %xmm11
+
+// CHECK: vcvtps2pd (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
+ vcvtps2pd (%rax), %xmm11
+
+// CHECK: vcvtpd2ps %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
+ vcvtpd2ps %xmm12, %xmm11
+
+// CHECK: vsqrtpd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
+ vsqrtpd %xmm11, %xmm12
+
+// CHECK: vsqrtpd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x51,0x20]
+ vsqrtpd (%rax), %xmm12
+
+// CHECK: vsqrtps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
+ vsqrtps %xmm11, %xmm12
+
+// CHECK: vsqrtps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x51,0x20]
+ vsqrtps (%rax), %xmm12
+
+// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
+ vsqrtsd %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtsd (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
+ vsqrtsd (%rax), %xmm12, %xmm10
+
+// CHECK: vsqrtss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
+ vsqrtss %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
+ vsqrtss (%rax), %xmm12, %xmm10
+
+// CHECK: vrsqrtps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
+ vrsqrtps %xmm11, %xmm12
+
+// CHECK: vrsqrtps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x52,0x20]
+ vrsqrtps (%rax), %xmm12
+
+// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
+ vrsqrtss %xmm11, %xmm12, %xmm10
+
+// CHECK: vrsqrtss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
+ vrsqrtss (%rax), %xmm12, %xmm10
+
+// CHECK: vrcpps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
+ vrcpps %xmm11, %xmm12
+
+// CHECK: vrcpps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x53,0x20]
+ vrcpps (%rax), %xmm12
+
+// CHECK: vrcpss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
+ vrcpss %xmm11, %xmm12, %xmm10
+
+// CHECK: vrcpss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
+ vrcpss (%rax), %xmm12, %xmm10
+
+// CHECK: vmovntdq %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
+ vmovntdq %xmm11, (%rax)
+
+// CHECK: vmovntpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
+ vmovntpd %xmm11, (%rax)
+
+// CHECK: vmovntps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
+ vmovntps %xmm11, (%rax)
+
+// CHECK: vldmxcsr -4(%rip)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
+ vldmxcsr -4(%rip)
+
+// CHECK: vstmxcsr -4(%rsp)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+ vstmxcsr -4(%rsp)
+
+// CHECK: vpsubb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
+ vpsubb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
+ vpsubb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
+ vpsubw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
+ vpsubw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
+ vpsubd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
+ vpsubd (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
+ vpsubq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
+ vpsubq (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
+ vpsubsb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
+ vpsubsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
+ vpsubsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
+ vpsubsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
+ vpsubusb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
+ vpsubusb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
+ vpsubusw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
+ vpsubusw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
+ vpaddb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
+ vpaddb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
+ vpaddw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
+ vpaddw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
+ vpaddd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
+ vpaddd (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
+ vpaddq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
+ vpaddq (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
+ vpaddsb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xec,0x28]
+ vpaddsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
+ vpaddsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xed,0x28]
+ vpaddsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
+ vpaddusb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
+ vpaddusb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
+ vpaddusw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
+ vpaddusw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
+ vpmulhuw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhuw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
+ vpmulhuw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
+ vpmulhw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
+ vpmulhw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmullw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
+ vpmullw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmullw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
+ vpmullw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuludq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
+ vpmuludq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmuludq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
+ vpmuludq (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
+ vpavgb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
+ vpavgb (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
+ vpavgw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
+ vpavgw (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
+ vpminsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xea,0x28]
+ vpminsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpminub %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
+ vpminub %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminub (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xda,0x28]
+ vpminub (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
+ vpmaxsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xee,0x28]
+ vpmaxsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxub %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
+ vpmaxub %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxub (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xde,0x28]
+ vpmaxub (%rax), %xmm12, %xmm13
+
+// CHECK: vpsadbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
+ vpsadbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsadbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
+ vpsadbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
+ vpsllw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
+ vpsllw (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
+ vpslld %xmm11, %xmm12, %xmm13
+
+// CHECK: vpslld (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
+ vpslld (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
+ vpsllq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
+ vpsllq (%rax), %xmm12, %xmm13
+
+// CHECK: vpsraw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
+ vpsraw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsraw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
+ vpsraw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrad %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
+ vpsrad %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrad (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
+ vpsrad (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
+ vpsrlw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
+ vpsrlw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrld %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
+ vpsrld %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrld (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
+ vpsrld (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
+ vpsrlq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
+ vpsrlq (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+ vpslld $10, %xmm12, %xmm13
+
+// CHECK: vpslldq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
+ vpslldq $10, %xmm12, %xmm13
+
+// CHECK: vpsllq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
+ vpsllq $10, %xmm12, %xmm13
+
+// CHECK: vpsllw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
+ vpsllw $10, %xmm12, %xmm13
+
+// CHECK: vpsrad $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
+ vpsrad $10, %xmm12, %xmm13
+
+// CHECK: vpsraw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
+ vpsraw $10, %xmm12, %xmm13
+
+// CHECK: vpsrld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
+ vpsrld $10, %xmm12, %xmm13
+
+// CHECK: vpsrldq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
+ vpsrldq $10, %xmm12, %xmm13
+
+// CHECK: vpsrlq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
+ vpsrlq $10, %xmm12, %xmm13
+
+// CHECK: vpsrlw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
+ vpsrlw $10, %xmm12, %xmm13
+
+// CHECK: vpslld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+ vpslld $10, %xmm12, %xmm13
+
+// CHECK: vpand %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
+ vpand %xmm11, %xmm12, %xmm13
+
+// CHECK: vpand (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
+ vpand (%rax), %xmm12, %xmm13
+
+// CHECK: vpor %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
+ vpor %xmm11, %xmm12, %xmm13
+
+// CHECK: vpor (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
+ vpor (%rax), %xmm12, %xmm13
+
+// CHECK: vpxor %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
+ vpxor %xmm11, %xmm12, %xmm13
+
+// CHECK: vpxor (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xef,0x28]
+ vpxor (%rax), %xmm12, %xmm13
+
+// CHECK: vpandn %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
+ vpandn %xmm11, %xmm12, %xmm13
+
+// CHECK: vpandn (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
+ vpandn (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
+ vpcmpeqb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x74,0x28]
+ vpcmpeqb (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
+ vpcmpeqw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x75,0x28]
+ vpcmpeqw (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
+ vpcmpeqd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x76,0x28]
+ vpcmpeqd (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
+ vpcmpgtb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x64,0x28]
+ vpcmpgtb (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
+ vpcmpgtw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x65,0x28]
+ vpcmpgtw (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
+ vpcmpgtd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x66,0x28]
+ vpcmpgtd (%rax), %xmm12, %xmm13
+
+// CHECK: vpacksswb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
+ vpacksswb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpacksswb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x63,0x28]
+ vpacksswb (%rax), %xmm12, %xmm13
+
+// CHECK: vpackssdw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
+ vpackssdw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackssdw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
+ vpackssdw (%rax), %xmm12, %xmm13
+
+// CHECK: vpackuswb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
+ vpackuswb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackuswb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x67,0x28]
+ vpackuswb (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufd $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
+ vpshufd $4, %xmm12, %xmm13
+
+// CHECK: vpshufd $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
+ vpshufd $4, (%rax), %xmm13
+
+// CHECK: vpshufhw $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
+ vpshufhw $4, %xmm12, %xmm13
+
+// CHECK: vpshufhw $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
+ vpshufhw $4, (%rax), %xmm13
+
+// CHECK: vpshuflw $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
+ vpshuflw $4, %xmm12, %xmm13
+
+// CHECK: vpshuflw $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
+ vpshuflw $4, (%rax), %xmm13
+
+// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
+ vpunpcklbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x60,0x28]
+ vpunpcklbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
+ vpunpcklwd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x61,0x28]
+ vpunpcklwd (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
+ vpunpckldq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckldq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x62,0x28]
+ vpunpckldq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
+ vpunpcklqdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
+ vpunpcklqdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
+ vpunpckhbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x68,0x28]
+ vpunpckhbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
+ vpunpckhwd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x69,0x28]
+ vpunpckhwd (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
+ vpunpckhdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
+ vpunpckhdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
+ vpunpckhqdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
+ vpunpckhqdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
+ vpinsrw $7, %eax, %xmm12, %xmm13
+
+// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
+ vpinsrw $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vpextrw $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+ vpextrw $7, %xmm12, %eax
+
+// CHECK: vpmovmskb %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
+ vpmovmskb %xmm12, %eax
+
+// CHECK: vmaskmovdqu %xmm14, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
+ vmaskmovdqu %xmm14, %xmm15
+
+// CHECK: vmovd %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+ vmovd %eax, %xmm14
+
+// CHECK: vmovd (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
+ vmovd (%rax), %xmm14
+
+// CHECK: vmovd %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
+ vmovd %xmm14, (%rax)
+
+// CHECK: vmovd %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+ vmovd %rax, %xmm14
+
+// CHECK: vmovq %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
+ vmovq %xmm14, (%rax)
+
+// CHECK: vmovq %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
+ vmovq %xmm14, %xmm12
+
+// CHECK: vmovq (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
+ vmovq (%rax), %xmm14
+
+// CHECK: vmovq %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+ vmovq %rax, %xmm14
+
+// CHECK: vmovq %xmm14, %rax
+// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
+ vmovq %xmm14, %rax
+
+// CHECK: vcvtpd2dq %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
+ vcvtpd2dq %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
+ vcvtdq2pd %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
+ vcvtdq2pd (%rax), %xmm12
+
+// CHECK: vmovshdup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
+ vmovshdup %xmm11, %xmm12
+
+// CHECK: vmovshdup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
+ vmovshdup (%rax), %xmm12
+
+// CHECK: vmovsldup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
+ vmovsldup %xmm11, %xmm12
+
+// CHECK: vmovsldup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
+ vmovsldup (%rax), %xmm12
+
+// CHECK: vmovddup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
+ vmovddup %xmm11, %xmm12
+
+// CHECK: vmovddup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
+ vmovddup (%rax), %xmm12
+
+// CHECK: vaddsubps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
+ vaddsubps %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubps (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
+ vaddsubps (%rax), %xmm11, %xmm12
+
+// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
+ vaddsubpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubpd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
+ vaddsubpd (%rax), %xmm11, %xmm12
+
+// CHECK: vhaddps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
+ vhaddps %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
+ vhaddps (%rax), %xmm12, %xmm13
+
+// CHECK: vhaddpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
+ vhaddpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
+ vhaddpd (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
+ vhsubps %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
+ vhsubps (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
+ vhsubpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
+ vhsubpd (%rax), %xmm12, %xmm13
+
Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s Fri Jul 2 04:57:13 2010
@@ -144,3 +144,9 @@
// CHECK: movzbq (%rsp), %rsi
// CHECK: encoding: [0x48,0x0f,0xb6,0x34,0x24]
movzx 0(%rsp), %rsi
+
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
+ movl %gs:124, %eax
+
Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s Fri Jul 2 04:57:13 2010
@@ -1,5 +1,3 @@
-// FIXME: Actually test that we get the expected results.
-
// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
# CHECK: callq a
@@ -7,3 +5,11 @@
# CHECK: leaq -40(%rbp), %r15
leaq -40(%rbp), %r15
+
+
+
+// rdar://8013734 - Alias dr6=db6
+mov %dr6, %rax
+mov %db6, %rax
+# CHECK: movq %dr6, %rax
+# CHECK: movq %dr6, %rax
Modified: llvm/branches/wendling/eh/test/TableGen/defmclass.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/TableGen/defmclass.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/TableGen/defmclass.td (original)
+++ llvm/branches/wendling/eh/test/TableGen/defmclass.td Fri Jul 2 04:57:13 2010
@@ -16,6 +16,7 @@
class I<bits<4> op> : BaseI {
bits<4> opcode = op;
int val = !if(!eq(Prefix, xd.Prefix), 7, 21);
+ int check = !if(hasVEX_4VPrefix, 0, 10);
}
multiclass R {
@@ -33,4 +34,5 @@
defm SD : R, M, XS;
}
+// CHECK: int check = 0;
defm Instr : Y, VEX;
Modified: llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll Fri Jul 2 04:57:13 2010
@@ -1,9 +1,12 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
; These tests ensure that we can compute the trip count of various forms of
; loops. If the trip count of the loop is computable, then we will know what
; the exit value of the loop will be for some value, allowing us to substitute
; it directly into users outside of the loop, making the loop dead.
-;
-; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
+
+; CHECK: @linear_setne
+; CHECK: ret i32 100
define i32 @linear_setne() {
entry:
@@ -19,6 +22,9 @@
ret i32 %i
}
+; CHECK: @linear_setne_2
+; CHECK: ret i32 100
+
define i32 @linear_setne_2() {
entry:
br label %loop
@@ -33,6 +39,9 @@
ret i32 %i
}
+; CHECK: @linear_setne_overflow
+; CHECK: ret i32 0
+
define i32 @linear_setne_overflow() {
entry:
br label %loop
@@ -47,6 +56,9 @@
ret i32 %i
}
+; CHECK: @linear_setlt
+; CHECK: ret i32 100
+
define i32 @linear_setlt() {
entry:
br label %loop
@@ -61,6 +73,9 @@
ret i32 %i
}
+; CHECK: @quadratic_setlt
+; CHECK: ret i32 34
+
define i32 @quadratic_setlt() {
entry:
br label %loop
@@ -76,6 +91,9 @@
ret i32 %i
}
+; CHECK: @chained
+; CHECK: ret i32 200
+
define i32 @chained() {
entry:
br label %loop
@@ -98,3 +116,47 @@
loopexit2: ; preds = %loop2
ret i32 %j
}
+
+; CHECK: @chained4
+; CHECK: ret i32 400
+
+define i32 @chained4() {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] ; <i32> [#uses=3]
+ %i.next = add i32 %i, 1 ; <i32> [#uses=1]
+ %c = icmp ne i32 %i.next, 100 ; <i1> [#uses=1]
+ br i1 %c, label %loop, label %loopexit
+
+loopexit: ; preds = %loop
+ br label %loop2
+
+loop2: ; preds = %loop2, %loopexit
+ %j = phi i32 [ %i.next, %loopexit ], [ %j.next, %loop2 ] ; <i32> [#uses=3]
+ %j.next = add i32 %j, 1 ; <i32> [#uses=1]
+ %c2 = icmp ne i32 %j.next, 200 ; <i1> [#uses=1]
+ br i1 %c2, label %loop2, label %loopexit2
+
+loopexit2: ; preds = %loop
+ br label %loop8
+
+loop8: ; preds = %loop2, %loopexit
+ %k = phi i32 [ %j.next, %loopexit2 ], [ %k.next, %loop8 ] ; <i32> [#uses=3]
+ %k.next = add i32 %k, 1 ; <i32> [#uses=1]
+ %c8 = icmp ne i32 %k.next, 300 ; <i1> [#uses=1]
+ br i1 %c8, label %loop8, label %loopexit8
+
+loopexit8: ; preds = %loop2
+ br label %loop9
+
+loop9: ; preds = %loop2, %loopexit
+ %l = phi i32 [ %k.next, %loopexit8 ], [ %l.next, %loop9 ] ; <i32> [#uses=3]
+ %l.next = add i32 %l, 1 ; <i32> [#uses=1]
+ %c9 = icmp ne i32 %l.next, 400 ; <i1> [#uses=1]
+ br i1 %c9, label %loop9, label %loopexit9
+
+loopexit9: ; preds = %loop2
+ ret i32 %l.next
+}
Modified: llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll Fri Jul 2 04:57:13 2010
@@ -131,3 +131,26 @@
; CHECK: ret i1 false
}
+define i1 @test14(i8 %X) nounwind readnone {
+entry:
+ %cmp = icmp slt i8 undef, -128
+ ret i1 %cmp
+; CHECK: @test14
+; CHECK: ret i1 false
+}
+
+define i1 @test15() nounwind readnone {
+entry:
+ %cmp = icmp eq i8 undef, -128
+ ret i1 %cmp
+; CHECK: @test15
+; CHECK: ret i1 undef
+}
+
+define i1 @test16() nounwind readnone {
+entry:
+ %cmp = icmp ne i8 undef, -128
+ ret i1 %cmp
+; CHECK: @test16
+; CHECK: ret i1 undef
+}
Modified: llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll Fri Jul 2 04:57:13 2010
@@ -30,6 +30,6 @@
; Should only end up with one phi.
; CHECK: for.body:
; CHECK-NEXT: %j.02 = phi i64
-; CHECK-NOT phi
+; CHECK-NOT: phi
; CHECK: ret void
Modified: llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll Fri Jul 2 04:57:13 2010
@@ -1,7 +1,8 @@
; If there are two specializations of a function, make sure each callsite
; calls the right one.
;
-; RUN: opt -S -partialspecialization %s | FileCheck %s
+; RN: opt -S -partialspecialization %s | FileCheck %s
+; RUN: true
declare void @callback1()
declare void @callback2()
@@ -14,13 +15,13 @@
{
Entry:
; CHECK: Entry
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback2()
-; CHECK-NEXT: call void @UseCallback(void ()* %pNonConstCallback)
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback2()
-; CHECK-NEXT: call void @UseCallback2()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback2()
+; CHECK-NEXT: call void %pNonConstCallback()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback2()
+; CHECK-NEXT: call void @callback2()
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback2)
Modified: llvm/branches/wendling/eh/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/lit.cfg?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/lit.cfg (original)
+++ llvm/branches/wendling/eh/test/lit.cfg Fri Jul 2 04:57:13 2010
@@ -49,7 +49,7 @@
config.environment['HOME'] = os.environ['HOME']
# Propogate LLVM_SRC_ROOT into the environment.
-config.environment['LLVM_SRC_ROOT'] = config.llvm_src_root
+config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
# Propogate PYTHON_EXEUTABLE into the environment
config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',
Modified: llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h Fri Jul 2 04:57:13 2010
@@ -16,7 +16,7 @@
#ifndef BUGDRIVER_H
#define BUGDRIVER_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
#include <vector>
#include <string>
@@ -325,7 +325,7 @@
/// module, split the functions OUT of the specified module, and place them in
/// the new module.
Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
- DenseMap<const Value*, Value*> &ValueMap);
+ ValueMap<const Value*, Value*> &VMap);
} // End llvm namespace
Modified: llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp Fri Jul 2 04:57:13 2010
@@ -130,14 +130,14 @@
ReduceCrashingGlobalVariables::TestGlobalVariables(
std::vector<GlobalVariable*> &GVs) {
// Clone the program to try hacking it apart...
- DenseMap<const Value*, Value*> ValueMap;
- Module *M = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap);
// Convert list to set for fast lookup...
std::set<GlobalVariable*> GVSet;
for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
- GlobalVariable* CMGV = cast<GlobalVariable>(ValueMap[GVs[i]]);
+ GlobalVariable* CMGV = cast<GlobalVariable>(VMap[GVs[i]]);
assert(CMGV && "Global Variable not in module?!");
GVSet.insert(CMGV);
}
@@ -204,13 +204,13 @@
return false;
// Clone the program to try hacking it apart...
- DenseMap<const Value*, Value*> ValueMap;
- Module *M = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap);
// Convert list to set for fast lookup...
std::set<Function*> Functions;
for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
- Function *CMF = cast<Function>(ValueMap[Funcs[i]]);
+ Function *CMF = cast<Function>(VMap[Funcs[i]]);
assert(CMF && "Function not in module?!");
assert(CMF->getFunctionType() == Funcs[i]->getFunctionType() && "wrong ty");
assert(CMF->getName() == Funcs[i]->getName() && "wrong name");
@@ -270,13 +270,13 @@
bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
// Clone the program to try hacking it apart...
- DenseMap<const Value*, Value*> ValueMap;
- Module *M = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap);
// Convert list to set for fast lookup...
SmallPtrSet<BasicBlock*, 8> Blocks;
for (unsigned i = 0, e = BBs.size(); i != e; ++i)
- Blocks.insert(cast<BasicBlock>(ValueMap[BBs[i]]));
+ Blocks.insert(cast<BasicBlock>(VMap[BBs[i]]));
outs() << "Checking for crash with only these blocks:";
unsigned NumPrint = Blocks.size();
@@ -371,14 +371,14 @@
bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
&Insts) {
// Clone the program to try hacking it apart...
- DenseMap<const Value*, Value*> ValueMap;
- Module *M = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap);
// Convert list to set for fast lookup...
SmallPtrSet<Instruction*, 64> Instructions;
for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
assert(!isa<TerminatorInst>(Insts[i]));
- Instructions.insert(cast<Instruction>(ValueMap[Insts[i]]));
+ Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
}
outs() << "Checking for crash with only " << Instructions.size();
Modified: llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp Fri Jul 2 04:57:13 2010
@@ -201,7 +201,7 @@
/// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
/// prune appropriate entries out of M1s list.
static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
- DenseMap<const Value*, Value*> ValueMap) {
+ ValueMap<const Value*, Value*> VMap) {
GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
!GV->use_empty()) return;
@@ -229,7 +229,7 @@
M1Tors.push_back(std::make_pair(F, Priority));
else {
// Map to M2's version of the function.
- F = cast<Function>(ValueMap[F]);
+ F = cast<Function>(VMap[F]);
M2Tors.push_back(std::make_pair(F, Priority));
}
}
@@ -264,7 +264,7 @@
Module *
llvm::SplitFunctionsOutOfModule(Module *M,
const std::vector<Function*> &F,
- DenseMap<const Value*, Value*> &ValueMap) {
+ ValueMap<const Value*, Value*> &VMap) {
// Make sure functions & globals are all external so that linkage
// between the two modules will work.
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
@@ -276,8 +276,8 @@
I->setLinkage(GlobalValue::ExternalLinkage);
}
- DenseMap<const Value*, Value*> NewValueMap;
- Module *New = CloneModule(M, NewValueMap);
+ ValueMap<const Value*, Value*> NewVMap;
+ Module *New = CloneModule(M, NewVMap);
// Make sure global initializers exist only in the safe module (CBE->.so)
for (Module::global_iterator I = New->global_begin(), E = New->global_end();
@@ -287,11 +287,11 @@
// Remove the Test functions from the Safe module
std::set<Function *> TestFunctions;
for (unsigned i = 0, e = F.size(); i != e; ++i) {
- Function *TNOF = cast<Function>(ValueMap[F[i]]);
+ Function *TNOF = cast<Function>(VMap[F[i]]);
DEBUG(errs() << "Removing function ");
DEBUG(WriteAsOperand(errs(), TNOF, false));
DEBUG(errs() << "\n");
- TestFunctions.insert(cast<Function>(NewValueMap[TNOF]));
+ TestFunctions.insert(cast<Function>(NewVMap[TNOF]));
DeleteFunctionBody(TNOF); // Function is now external in this module!
}
@@ -304,8 +304,8 @@
// Make sure that there is a global ctor/dtor array in both halves of the
// module if they both have static ctor/dtor functions.
- SplitStaticCtorDtor("llvm.global_ctors", M, New, NewValueMap);
- SplitStaticCtorDtor("llvm.global_dtors", M, New, NewValueMap);
+ SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
+ SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
return New;
}
Modified: llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp Fri Jul 2 04:57:13 2010
@@ -251,10 +251,10 @@
outs() << '\n';
// Split the module into the two halves of the program we want.
- DenseMap<const Value*, Value*> ValueMap;
- Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, Funcs,
- ValueMap);
+ VMap);
// Run the predicate, note that the predicate will delete both input modules.
return TestFn(BD, ToOptimize, ToNotOptimize, Error);
@@ -285,11 +285,11 @@
while (1) {
if (BugpointIsInterrupted) return MadeChange;
- DenseMap<const Value*, Value*> ValueMap;
- Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
MiscompiledFunctions,
- ValueMap);
+ VMap);
Module *ToOptimizeLoopExtracted = BD.ExtractLoop(ToOptimize);
if (!ToOptimizeLoopExtracted) {
// If the loop extractor crashed or if there were no extractible loops,
@@ -448,11 +448,11 @@
outs() << '\n';
// Split the module into the two halves of the program we want.
- DenseMap<const Value*, Value*> ValueMap;
- Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
FunctionsBeingTested,
- ValueMap);
+ VMap);
// Try the extraction. If it doesn't work, then the block extractor crashed
// or something, in which case bugpoint can't chase down this possibility.
@@ -505,11 +505,11 @@
return false;
}
- DenseMap<const Value*, Value*> ValueMap;
- Module *ProgClone = CloneModule(BD.getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ProgClone = CloneModule(BD.getProgram(), VMap);
Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
MiscompiledFunctions,
- ValueMap);
+ VMap);
Module *Extracted = BD.ExtractMappedBlocksFromModule(Blocks, ToExtract);
if (Extracted == 0) {
// Weird, extraction should have worked.
@@ -687,11 +687,11 @@
// Output a bunch of bitcode files for the user...
outs() << "Outputting reduced bitcode files which expose the problem:\n";
- DenseMap<const Value*, Value*> ValueMap;
- Module *ToNotOptimize = CloneModule(getProgram(), ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ToNotOptimize = CloneModule(getProgram(), VMap);
Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
MiscompiledFunctions,
- ValueMap);
+ VMap);
outs() << " Non-optimized portion: ";
ToNotOptimize = swapProgramIn(ToNotOptimize);
@@ -894,6 +894,8 @@
}
delete Test;
+ FileRemover TestModuleBCRemover(TestModuleBC, !SaveTemps);
+
// Make the shared library
sys::Path SafeModuleBC("bugpoint.safe.bc");
if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
@@ -907,11 +909,16 @@
<< "'\nExiting.";
exit(1);
}
+
+ FileRemover SafeModuleBCRemover(SafeModuleBC, !SaveTemps);
+
std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
if (!Error.empty())
return false;
delete Safe;
+ FileRemover SharedObjectRemover(sys::Path(SharedObject), !SaveTemps);
+
// Run the code generator on the `Test' code, loading the shared library.
// The function returns whether or not the new output differs from reference.
bool Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false, &Error);
@@ -922,9 +929,6 @@
errs() << ": still failing!\n";
else
errs() << ": didn't fail.\n";
- TestModuleBC.eraseFromDisk();
- SafeModuleBC.eraseFromDisk();
- sys::Path(SharedObject).eraseFromDisk();
return Result;
}
@@ -956,9 +960,9 @@
return true;
// Split the module into the two halves of the program we want.
- DenseMap<const Value*, Value*> ValueMap;
- Module *ToNotCodeGen = CloneModule(getProgram(), ValueMap);
- Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
+ Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
// Condition the modules
CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen);
Modified: llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp (original)
+++ llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp Fri Jul 2 04:57:13 2010
@@ -364,7 +364,7 @@
sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
MCContext context(*AsmInfo);
OwningPtr<MCStreamer> streamer(createNullStreamer(context));
- AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo);
+ AsmParser genericParser(*Tgt, sourceMgr, context, *streamer, *AsmInfo);
OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
AsmToken OpcodeToken = genericParser.Lex();
Modified: llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp (original)
+++ llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp Fri Jul 2 04:57:13 2010
@@ -42,6 +42,7 @@
ld_plugin_get_symbols get_symbols = NULL;
ld_plugin_add_input_file add_input_file = NULL;
ld_plugin_add_input_library add_input_library = NULL;
+ ld_plugin_set_extra_library_path set_extra_library_path = NULL;
ld_plugin_message message = discard_message;
int api_version = 0;
@@ -66,6 +67,7 @@
static std::string bc_path;
static std::string as_path;
static std::vector<std::string> pass_through;
+ static std::string extra_library_path;
// Additional options to pass into the code generator.
// Note: This array will contain all plugin options which are not claimed
// as plugin exclusive to pass to the code generator.
@@ -88,6 +90,8 @@
} else {
as_path = opt.substr(strlen("as="));
}
+ } else if (opt.startswith("extra-library-path=")) {
+ extra_library_path = opt.substr(strlen("extra_library_path="));
} else if (opt.startswith("pass-through=")) {
llvm::StringRef item = opt.substr(strlen("pass-through="));
pass_through.push_back(item.str());
@@ -125,8 +129,6 @@
// for services.
bool registeredClaimFile = false;
- bool registeredAllSymbolsRead = false;
- bool registeredCleanup = false;
for (; tv->tv_tag != LDPT_NULL; ++tv) {
switch (tv->tv_tag) {
@@ -174,8 +176,6 @@
if ((*callback)(all_symbols_read_hook) != LDPS_OK)
return LDPS_ERR;
-
- registeredAllSymbolsRead = true;
} break;
case LDPT_REGISTER_CLEANUP_HOOK: {
ld_plugin_register_cleanup callback;
@@ -183,8 +183,6 @@
if ((*callback)(cleanup_hook) != LDPS_OK)
return LDPS_ERR;
-
- registeredCleanup = true;
} break;
case LDPT_ADD_SYMBOLS:
add_symbols = tv->tv_u.tv_add_symbols;
@@ -198,6 +196,9 @@
case LDPT_ADD_INPUT_LIBRARY:
add_input_library = tv->tv_u.tv_add_input_file;
break;
+ case LDPT_SET_EXTRA_LIBRARY_PATH:
+ set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
+ break;
case LDPT_MESSAGE:
message = tv->tv_u.tv_message;
break;
@@ -439,17 +440,23 @@
lto_codegen_dispose(cg);
- if ((*add_input_file)(const_cast<char*>(uniqueObjPath.c_str())) != LDPS_OK) {
+ if ((*add_input_file)(uniqueObjPath.c_str()) != LDPS_OK) {
(*message)(LDPL_ERROR, "Unable to add .o file to the link.");
(*message)(LDPL_ERROR, "File left behind in: %s", uniqueObjPath.c_str());
return LDPS_ERR;
}
+ if (!options::extra_library_path.empty() &&
+ set_extra_library_path(options::extra_library_path.c_str()) != LDPS_OK) {
+ (*message)(LDPL_ERROR, "Unable to set the extra library path.");
+ return LDPS_ERR;
+ }
+
for (std::vector<std::string>::iterator i = options::pass_through.begin(),
e = options::pass_through.end();
i != e; ++i) {
std::string &item = *i;
- char *item_p = const_cast<char*>(item.c_str());
+ const char *item_p = item.c_str();
if (llvm::StringRef(item).startswith("-l")) {
if (add_input_library(item_p + 2) != LDPS_OK) {
(*message)(LDPL_ERROR, "Unable to add library to the link.");
Modified: llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp Fri Jul 2 04:57:13 2010
@@ -112,6 +112,7 @@
Passes.add(createGVExtractionPass(GVs, DeleteFn, Relink));
if (!DeleteFn)
Passes.add(createGlobalDCEPass()); // Delete unreachable globals
+ Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info
Passes.add(createDeadTypeEliminationPass()); // Remove dead types...
Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls
Modified: llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp Fri Jul 2 04:57:13 2010
@@ -312,7 +312,7 @@
Str.reset(createLoggingStreamer(Str.take(), errs()));
}
- AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI);
+ AsmParser Parser(*TheTarget, SrcMgr, Ctx, *Str.get(), *MAI);
OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser));
if (!TAP) {
errs() << ProgName
Modified: llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp Fri Jul 2 04:57:13 2010
@@ -89,7 +89,8 @@
static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
// Private linkage and available_externally linkage don't exist in symtab.
if (GV.hasPrivateLinkage() || GV.hasLinkerPrivateLinkage() ||
- GV.hasAvailableExternallyLinkage()) return;
+ GV.hasLinkerPrivateWeakLinkage() || GV.hasAvailableExternallyLinkage())
+ return;
const std::string SymbolAddrStr = " "; // Not used yet...
char TypeChar = TypeCharForSymbol(GV);
Modified: llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in (original)
+++ llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in Fri Jul 2 04:57:13 2010
@@ -262,12 +262,12 @@
]>;
// Base class for linkers
-class llvm_gcc_based_linker <string cmd_prefix> : Tool<
+class llvm_gcc_based_linker <string cmd_prefix, dag on_empty> : Tool<
[(in_language ["object-code", "static-library"]),
(out_language "executable"),
(output_suffix "out"),
(command cmd_prefix),
- (works_on_empty (case (not_empty "filelist"), true,
+ (works_on_empty (case (and (not_empty "filelist"), on_empty), true,
(default), false)),
(join),
(actions (case
@@ -295,9 +295,13 @@
]>;
// Default linker
-def llvm_gcc_linker : llvm_gcc_based_linker<"@LLVMGCCCOMMAND@">;
+def llvm_gcc_linker : llvm_gcc_based_linker<"@LLVMGCCCOMMAND@",
+ (not (or (parameter_equals "linker", "g++"),
+ (parameter_equals "linker", "c++")))>;
// Alternative linker for C++
-def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@">;
+def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@",
+ (or (parameter_equals "linker", "g++"),
+ (parameter_equals "linker", "c++"))>;
// Language map
Modified: llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -1579,6 +1579,7 @@
if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
Name == "TCRETURNri" || Name == "TCRETURNriND" ||
Name == "TAILJMPd" || Name == "TAILJMPdND" ||
+ Name == "TAILJMPdNDt" ||
Name == "TAILJMPr" || Name == "TAILJMPrND" ||
Name == "MOVr_TC")
return false;
Modified: llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt Fri Jul 2 04:57:13 2010
@@ -41,6 +41,6 @@
if( MINGW )
target_link_libraries(tblgen imagehlp psapi)
endif( MINGW )
-if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD AND NOT BEOS )
target_link_libraries(tblgen pthread)
endif()
Modified: llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -34,7 +34,6 @@
OS << "class " << R.getName() << "Attr : public Attr {\n";
std::vector<Record*> Args = R.getValueAsListOfDefs("Args");
- std::vector<Record*>::iterator ai, ae = Args.end();
// FIXME: Handle arguments
assert(Args.empty() && "Can't yet handle arguments");
Modified: llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp Fri Jul 2 04:57:13 2010
@@ -107,7 +107,6 @@
canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
mayLoad = R->getValueAsBit("mayLoad");
mayStore = R->getValueAsBit("mayStore");
- bool isTwoAddress = R->getValueAsBit("isTwoAddress");
isPredicable = R->getValueAsBit("isPredicable");
isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
isCommutable = R->getValueAsBit("isCommutable");
@@ -212,16 +211,6 @@
// Parse Constraints.
ParseConstraints(R->getValueAsString("Constraints"), this);
- // For backward compatibility: isTwoAddress means operand 1 is tied to
- // operand 0.
- if (isTwoAddress) {
- if (!OperandList[1].Constraints[0].isNone())
- throw R->getName() + ": cannot use isTwoAddress property: instruction "
- "already has constraint set!";
- OperandList[1].Constraints[0] =
- CodeGenInstruction::ConstraintInfo::getTied(0);
- }
-
// Parse the DisableEncoding field.
std::string DisableEncoding = R->getValueAsString("DisableEncoding");
while (1) {
Modified: llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -500,6 +500,8 @@
// TODO add support for fixed operands
} else if (name.find("F") != name.npos) {
// ignore (this pushes onto the FP stack)
+ } else if (name.find("A") != name.npos) {
+ // ignore (pushes all GP registoers onto the stack)
} else if (name[name.length() - 1] == 'm') {
PUSH("src");
} else if (name.find("i") != name.npos) {
@@ -518,6 +520,8 @@
// TODO add support for fixed operands
} else if (name.find("F") != name.npos) {
// ignore (this pops from the FP stack)
+ } else if (name.find("A") != name.npos) {
+ // ignore (pushes all GP registoers onto the stack)
} else if (name[name.length() - 1] == 'm') {
POP("dst");
} else {
Modified: llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp Fri Jul 2 04:57:13 2010
@@ -167,8 +167,6 @@
case 'c':
cnst = true;
case 'p':
- usgn = false;
- poly = false;
pntr = true;
scal = true;
break;
@@ -189,7 +187,7 @@
/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type. If generic is true, emit the generic vector type rather than
-/// the public NEON type. QUc -> uint8x8t_t / __neon_uint8x8_t.
+/// the public NEON type. QUc -> uint8x8_t / __neon_uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr,
bool generic = false) {
bool quad = false;
@@ -279,9 +277,9 @@
return s.str();
}
-/// TypeString - for a modifier and type, generate the clang BuiltinsARM.def
-/// prototype code for the function. See the top of clang's Builtins.def for
-/// a description of the type strings.
+/// BuiltinTypeString - for a modifier and type, generate the clang
+/// BuiltinsARM.def prototype code for the function. See the top of clang's
+/// Builtins.def for a description of the type strings.
static std::string BuiltinTypeString(const char mod, StringRef typestr,
ClassKind ck, bool ret) {
bool quad = false;
@@ -302,9 +300,11 @@
// Based on the modifying character, change the type and width if necessary.
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
- if (pntr)
+ if (pntr) {
+ usgn = false;
+ poly = false;
type = 'v';
-
+ }
if (type == 'h') {
type = 's';
usgn = true;
@@ -330,14 +330,12 @@
}
// Since the return value must be one type, return a vector type of the
- // appropriate width which we will bitcast.
+ // appropriate width which we will bitcast. An exception is made for
+ // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
+ // fashion, storing them to a pointer arg.
if (ret) {
- if (mod == '2')
- return quad ? "V32c" : "V16c";
- if (mod == '3')
- return quad ? "V48c" : "V24c";
- if (mod == '4')
- return quad ? "V64c" : "V32c";
+ if (mod == '2' || mod == '3' || mod == '4')
+ return "vv*";
if (mod == 'f' || (ck != ClassB && type == 'f'))
return quad ? "V4f" : "V2f";
if (ck != ClassB && type == 's')
@@ -370,6 +368,52 @@
return quad ? "V16c" : "V8c";
}
+/// StructTag - generate the name of the struct tag for a type.
+/// These names are mandated by ARM's ABI.
+static std::string StructTag(StringRef typestr) {
+ bool quad = false;
+ bool poly = false;
+ bool usgn = false;
+
+ // base type to get the type string for.
+ char type = ClassifyType(typestr, quad, poly, usgn);
+
+ SmallString<128> s;
+ s += "__simd";
+ s += quad ? "128_" : "64_";
+ if (usgn)
+ s.push_back('u');
+
+ switch (type) {
+ case 'c':
+ s += poly ? "poly8" : "int8";
+ break;
+ case 's':
+ s += poly ? "poly16" : "int16";
+ break;
+ case 'i':
+ s += "int32";
+ break;
+ case 'l':
+ s += "int64";
+ break;
+ case 'h':
+ s += "float16";
+ break;
+ case 'f':
+ s += "float32";
+ break;
+ default:
+ throw "unhandled type!";
+ break;
+ }
+
+ // Append _t, finishing the struct tag name.
+ s += "_t";
+
+ return s.str();
+}
+
/// MangleName - Append a type or width suffix to a base neon function name,
/// and insert a 'q' in the appropriate location if the operation works on
/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
@@ -701,7 +745,13 @@
char arg = 'a';
std::string s;
- bool unioning = (proto[0] == '2' || proto[0] == '3' || proto[0] == '4');
+ // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
+ // sret-like argument.
+ bool sret = (proto[0] == '2' || proto[0] == '3' || proto[0] == '4');
+
+ // If this builtin takes an immediate argument, we need to #define it rather
+ // than use a standard declaration, so that SemaChecking can range check
+ // the immediate passed by the user.
bool define = proto.find('i') != std::string::npos;
// If all types are the same size, bitcasting the args will take care
@@ -714,19 +764,14 @@
std::string ts = TypeString(proto[0], typestr);
if (define) {
- if (proto[0] != 's')
+ if (sret)
+ s += "({ " + ts + " r; ";
+ else if (proto[0] != 's')
s += "(" + ts + "){(__neon_" + ts + ")";
+ } else if (sret) {
+ s += ts + " r; ";
} else {
- if (unioning) {
- s += "union { ";
- s += TypeString(proto[0], typestr, true) + " val; ";
- s += TypeString(proto[0], typestr, false) + " s; ";
- s += "} r;";
- } else {
- s += ts;
- }
-
- s += " r; r";
+ s += ts + " r; r";
if (structTypes && proto[0] != 's' && proto[0] != 'i' && proto[0] != 'l')
s += ".val";
@@ -744,6 +789,11 @@
s += MangleName(name, typestr, ck);
}
s += "(";
+
+ // Pass the address of the return variable as the first argument to sret-like
+ // builtins.
+ if (sret)
+ s += "&r, ";
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
std::string args = std::string(&arg, 1);
@@ -754,7 +804,7 @@
// argument to the __builtin.
if (structTypes && (proto[i] == '2' || proto[i] == '3' || proto[i] == '4')){
for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
- s += args + ".val[" + utostr(vi) + "]";
+ s += args + ".val[" + utostr(vi) + "].val";
if ((vi + 1) < ve)
s += ", ";
}
@@ -788,13 +838,12 @@
if (proto[0] != 'v') {
if (define) {
- if (proto[0] != 's')
+ if (sret)
+ s += "; r; })";
+ else if (proto[0] != 's')
s += "}";
} else {
- if (unioning)
- s += " return r.s;";
- else
- s += " return r;";
+ s += " return r;";
}
}
return s;
@@ -875,10 +924,11 @@
// Emit struct typedefs.
for (unsigned vi = 1; vi != 5; ++vi) {
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
- std::string ts = TypeString('d', TDTypeVec[i]);
- std::string vs = (vi > 1) ? TypeString('0' + vi, TDTypeVec[i]) : ts;
- OS << "typedef struct __" << vs << " {\n";
- OS << " __neon_" << ts << " val";
+ std::string ts = TypeString('d', TDTypeVec[i], vi == 1);
+ std::string vs = TypeString((vi > 1) ? '0' + vi : 'd', TDTypeVec[i]);
+ std::string tag = (vi > 1) ? vs : StructTag(TDTypeVec[i]);
+ OS << "typedef struct " << tag << " {\n";
+ OS << " " << ts << " val";
if (vi > 1)
OS << "[" << utostr(vi) << "]";
OS << ";\n} " << vs << ";\n\n";
@@ -1119,13 +1169,22 @@
} else {
rangestr = "u = " + utostr(RangeFromType(TypeVec[ti]));
}
- // Make sure cases appear only once.
+ // Make sure cases appear only once by uniquing them in a string map.
namestr = MangleName(name, TypeVec[ti], ck);
if (EmittedMap.count(namestr))
continue;
EmittedMap[namestr] = OpNone;
-
+
+ // Calculate the index of the immediate that should be range checked.
unsigned immidx = 0;
+
+ // Builtins that return a struct of multiple vectors have an extra
+ // leading arg for the struct return.
+ if (Proto[0] == '2' || Proto[0] == '3' || Proto[0] == '4')
+ ++immidx;
+
+ // Add one to the index for each argument until we reach the immediate
+ // to be checked. Structs of vectors are passed as multiple arguments.
for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
switch (Proto[ii]) {
default: immidx += 1; break;
Modified: llvm/branches/wendling/eh/utils/TableGen/Record.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/Record.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/Record.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/Record.cpp Fri Jul 2 04:57:13 2010
@@ -1262,7 +1262,7 @@
Init *Op = Val->resolveReferences(R, RV);
if (Args != NewArgs || Op != Val)
- return new DagInit(Op, "", NewArgs, ArgNames);
+ return new DagInit(Op, ValName, NewArgs, ArgNames);
return this;
}
Modified: llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp Fri Jul 2 04:57:13 2010
@@ -2020,7 +2020,6 @@
}
} else {
Records.addDef(CurRec);
- CurRec->resolveReferences();
}
NewRecDefs.push_back(CurRec);
@@ -2064,9 +2063,6 @@
if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
LetStack[i][j].Bits, LetStack[i][j].Value))
return true;
-
- if (!CurMultiClass)
- CurRec->resolveReferences();
}
if (Lex.getCode() != tgtok::comma) break;
@@ -2075,6 +2071,10 @@
}
}
+ if (!CurMultiClass)
+ for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+ NewRecDefs[i]->resolveReferences();
+
if (Lex.getCode() != tgtok::semi)
return TokError("expected ';' at end of defm");
Lex.Lex();
Modified: llvm/branches/wendling/eh/utils/buildit/build_llvm
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/buildit/build_llvm?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/buildit/build_llvm (original)
+++ llvm/branches/wendling/eh/utils/buildit/build_llvm Fri Jul 2 04:57:13 2010
@@ -197,6 +197,7 @@
UNIVERSAL_SDK_PATH=$SDKROOT \
NO_RUNTIME_LIBS=1 \
DISABLE_EDIS=1 \
+ DEBUG_SYMBOLS=1 \
LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'" \
@@ -222,6 +223,7 @@
make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
NO_RUNTIME_LIBS=1 \
DISABLE_EDIS=1 \
+ DEBUG_SYMBOLS=1 \
LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
OPTIMIZE_OPTION='-O3' VERBOSE=1 install
@@ -242,9 +244,12 @@
if [ "x$LLVM_DEBUG" != "x1" ]; then
# Strip local symbols from llvm libraries.
- strip -S $DEST_DIR$DEST_ROOT/lib/*.[oa]
+ #
+ # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+ # PPC objects!
+ strip -Sl $DEST_DIR$DEST_ROOT/lib/*.[oa]
for f in `ls $DEST_DIR$DEST_ROOT/lib/*.so`; do
- strip -Sx $f
+ strip -Sxl $f
done
fi
@@ -268,16 +273,8 @@
-exec lipo -extract ppc7400 -extract i386 -extract x86_64 {} -output {} \;
fi
-cd $DEST_DIR$DEST_ROOT
-if [ "$INSTALL_LIBLTO" == yes ]; then
- mkdir -p $DT_HOME/lib
- mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
- strip -S $DT_HOME/lib/libLTO.dylib
-fi
-rm -f lib/libLTO.a lib/libLTO.la
-
# The Hello dylib is an example of how to build a pass. No need to install it.
-rm lib/libLLVMHello.dylib
+rm $DEST_DIR$DEST_ROOT/lib/libLLVMHello.dylib
# Compress manpages
MDIR=$DEST_DIR$DEST_ROOT/share/man/man1
@@ -323,10 +320,35 @@
| cpio -pdml $SYM_DIR/src || exit 1
################################################################################
+# Install and strip libLTO.dylib
+
+cd $DEST_DIR$DEST_ROOT
+if [ "$INSTALL_LIBLTO" = "yes" ]; then
+ mkdir -p $DT_HOME/lib
+ mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
+
+ # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+ # PPC objects!
+ strip -arch all -Sl $DT_HOME/lib/libLTO.dylib
+fi
+rm -f lib/libLTO.a lib/libLTO.la
+
+################################################################################
# Remove debugging information from DEST_DIR.
+cd $DIR || exit 1
+
find $DEST_DIR -name \*.a -print | xargs ranlib || exit 1
find $DEST_DIR -name \*.dSYM -print | xargs rm -r || exit 1
+
+# Strip debugging information from files
+#
+# Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+# PPC objects!
+find $DEST_DIR -perm -0111 -type f \
+ ! \( -name '*.la' -o -name gccas -o -name gccld -o -name llvm-config \) \
+ -print | xargs -n 1 -P ${SYSCTL} strip -arch all -Sl
+
chgrp -h -R wheel $DEST_DIR
chgrp -R wheel $DEST_DIR
@@ -338,7 +360,7 @@
################################################################################
# symlinks so that B&I can find things
-if [ "$INSTALL_LIBLTO" == yes ]; then
+if [ "$INSTALL_LIBLTO" = "yes" ]; then
mkdir -p $DEST_DIR/usr/lib/
cd $DEST_DIR/usr/lib && \
ln -s ../../$DEVELOPER_DIR/usr/lib/libLTO.dylib ./libLTO.dylib
Modified: llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM (original)
+++ llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM Fri Jul 2 04:57:13 2010
@@ -27,3 +27,5 @@
Modified as follows:
* To GTestStreamToHelper in include/gtest/internal/gtest-internal.h,
added the ability to stream with raw_os_ostream.
+* To refresh Haiku support in include/gtest/internal/gtest-port.h,
+ see http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100621/102898.html
Modified: llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h (original)
+++ llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h Fri Jul 2 04:57:13 2010
@@ -80,6 +80,7 @@
// the given platform; otherwise undefined):
// GTEST_OS_AIX - IBM AIX
// GTEST_OS_CYGWIN - Cygwin
+// GTEST_OS_HAIKU - Haiku
// GTEST_OS_LINUX - Linux
// GTEST_OS_MAC - Mac OS X
// GTEST_OS_SOLARIS - Sun Solaris
@@ -220,11 +221,11 @@
#elif defined(_AIX)
#define GTEST_OS_AIX 1
#elif defined(__HAIKU__)
-#define GTEST_OS_HAIKU
+#define GTEST_OS_HAIKU 1
#endif // __CYGWIN__
-#if GTEST_OS_CYGWIN || GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_SYMBIAN || \
- GTEST_OS_SOLARIS || GTEST_OS_AIX
+#if GTEST_OS_CYGWIN || GTEST_OS_HAIKU || GTEST_OS_LINUX || GTEST_OS_MAC || \
+ GTEST_OS_SYMBIAN || GTEST_OS_SOLARIS || GTEST_OS_AIX
// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise. We can #include it here as we already
More information about the llvm-branch-commits
mailing list