[llvm-branch-commits] [llvm-branch] r172541 [5/8] - in /llvm/branches/AMDILBackend: ./ autoconf/ bindings/ocaml/executionengine/ bindings/ocaml/llvm/ bindings/ocaml/target/ cmake/ cmake/modules/ cmake/platforms/ docs/ docs/CommandGuide/ docs/_themes/ docs/_themes/llvm-theme/ docs/_themes/llvm-theme/static/ docs/llvm-theme/ docs/llvm-theme/static/ docs/tutorial/ examples/ExceptionDemo/ examples/Fibonacci/ examples/Kaleidoscope/Chapter4/ examples/Kaleidoscope/Chapter5/ examples/Kaleidoscope/Chapter6/ examples/Kaleidoscope/Chapt...
Richard Relph
Richard.Relph at amd.com
Tue Jan 15 09:16:26 PST 2013
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jan 15 11:16:16 2013
@@ -106,7 +106,7 @@
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
- // We do not currently implment this libm ops for PowerPC.
+ // We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
@@ -361,6 +361,22 @@
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ }
+
+ for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ setOperationAction(ISD::FSQRT, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -373,6 +389,10 @@
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
@@ -392,6 +412,14 @@
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ // Altivec does not contain unordered floating-point compare instructions
+ setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
@@ -449,6 +477,21 @@
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ maxStoresPerMemset = 32;
+ maxStoresPerMemsetOptSize = 16;
+ maxStoresPerMemcpy = 32;
+ maxStoresPerMemcpyOptSize = 8;
+ maxStoresPerMemmove = 32;
+ maxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ benefitFromCodePlacementOpt = true;
+ }
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -517,11 +560,15 @@
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i32;
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
}
//===----------------------------------------------------------------------===//
@@ -811,14 +858,13 @@
}
// Properly sign extend the value.
- int ShAmt = (4-ByteSize)*8;
- int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
- if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, MVT::i32);
return SDValue();
}
@@ -1204,6 +1250,14 @@
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue CPIHi =
@@ -1217,6 +1271,14 @@
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1232,8 +1294,8 @@
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
- SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
- SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+ SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+ SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
@@ -1441,7 +1503,7 @@
MachinePointerInfo(),
MVT::i32, false, false, 0);
- return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
false, false, false, 0);
}
@@ -1461,7 +1523,7 @@
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
Type *IntPtrTy =
- DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+ DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
*DAG.getContext());
TargetLowering::ArgListTy Args;
@@ -1684,9 +1746,13 @@
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
- return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
} else {
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
@@ -1694,7 +1760,7 @@
}
SDValue
-PPCTargetLowering::LowerFormalArguments_SVR4(
+PPCTargetLowering::LowerFormalArguments_32SVR4(
SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
@@ -1911,6 +1977,334 @@
return Chain;
}
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+ SelectionDAG &DAG, SDValue ArgVal,
+ DebugLoc dl) const {
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+}
+
+// Set the size that is at least reserved in caller of this function. Tail
+// call optimized functions' reserved stack space needs to be aligned so that
+// taking the difference between two stack areas will result in an aligned
+// stack.
+void
+PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+ unsigned nAltivecParamsAtEnd,
+ unsigned MinReservedArea,
+ bool isPPC64) const {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign
+ = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 8;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const uint16_t GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof(VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else
+ nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Empty aggregate parameters do not take up registers. Examples:
+ // struct { } a;
+ // union { } b;
+ // int c[0];
+ // etc. However, we have to provide a place-holder in InVals, so
+ // pretend we have an 8-byte item at the current address for that
+ // purpose.
+ if (!ObjSize) {
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ continue;
+ }
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (ObjSize < PtrByteSize)
+ CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ if (ObjSize < 8) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, CurArgOffset),
+ ObjType, false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot. The address of the parameter was already calculated
+ // above (InVals.push_back(FIN)) to be the right-justified
+ // offset within the slot. For this store, we need a new
+ // frame index that points at the beginning of the slot.
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ }
+
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - j;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 8 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+
+ ArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrByteSize, Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Chain,
@@ -1987,10 +2381,12 @@
default: llvm_unreachable("Unhandled argument type!");
case MVT::i32:
case MVT::f32:
- VecArgOffset += isPPC64 ? 8 : 4;
+ VecArgOffset += 4;
break;
case MVT::i64: // PPC64
case MVT::f64:
+ // FIXME: We are guaranteed to be !isPPC64 at this point.
+ // Does MVT::i64 apply?
VecArgOffset += 8;
break;
case MVT::v4f32:
@@ -2013,7 +2409,8 @@
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
@@ -2061,10 +2458,11 @@
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(),
- ObjSize==1 ? MVT::i8 : MVT::i16,
- false, false, 0);
+ MachinePointerInfo(FuncArg,
+ CurArgOffset),
+ ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
@@ -2075,8 +2473,8 @@
}
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
- // to memory. ArgVal will be address of the beginning of
- // the object.
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg;
if (isPPC64)
@@ -2087,7 +2485,7 @@
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(),
+ MachinePointerInfo(FuncArg, ArgOffset),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -2122,18 +2520,10 @@
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32) {
+ if (ObjectVT == MVT::i32)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
- if (Flags.isSExt())
- ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
- else if (Flags.isZExt())
- ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
- DAG.getValueType(ObjectVT));
-
- ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
- }
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
++GPR_idx;
} else {
@@ -2220,23 +2610,10 @@
}
// Set the size that is at least reserved in caller of this function. Tail
- // call optimized function's reserved stack space needs to be aligned so that
+ // call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2276,8 +2653,8 @@
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area for the Darwin ABI.
+/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
+/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
static unsigned
CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
bool isPPC64,
@@ -2408,7 +2785,7 @@
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 6 >> 6) != Addr)
+ SignExtend32<26>(Addr) != Addr)
return 0; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
@@ -2686,7 +3063,7 @@
// Thus for a call through a function pointer, the following actions need
// to be performed:
// 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin()).
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
// 2. Load the address of the function entry point from the function
// descriptor.
// 3. Load the TOC of the callee from the function descriptor into r2.
@@ -2776,6 +3153,15 @@
return CallOpc;
}
+static
+bool isLocalCall(const SDValue &Callee)
+{
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return !G->getGlobal()->isDeclaration() &&
+ !G->getGlobal()->isWeakForLinker();
+ return false;
+}
+
SDValue
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -2791,12 +3177,32 @@
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
- EVT VT = VA.getValVT();
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyFromReg(Chain, dl,
- VA.getLocReg(), VT, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- InFlag = Chain.getValue(2);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
}
return Chain;
@@ -2819,6 +3225,10 @@
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
@@ -2880,8 +3290,8 @@
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if (CallOpc == PPCISD::CALL_SVR4) {
- // Otherwise insert NOP.
+ } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ // Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP_SVR4;
}
}
@@ -2923,10 +3333,16 @@
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
- return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ }
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
@@ -2934,15 +3350,15 @@
}
SDValue
-PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
assert((CallConv == CallingConv::C ||
@@ -3042,73 +3458,455 @@
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
- SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ bool seenFloatArg = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart,
+ ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ return NewCallSeqStart;
+}
+
+SDValue
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned PtrByteSize = 8;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with at least 48 bytes, which
+ // is reserved space for [SP][CR][LR][3 x unused].
+ // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
+ // of this call.
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
+ Outs, OutVals, nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const uint16_t GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
- bool seenFloatArg = false;
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, j = 0, e = ArgLocs.size();
- i != e;
- ++i) {
- CCValAssign &VA = ArgLocs[i];
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // Promote integers to 64-bit values.
+ if (Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
if (Flags.isByVal()) {
- // Argument is an aggregate which is passed by value, thus we need to
- // create a copy of it in the local variable space of the current stack
- // frame (which is the stack frame of the caller) and pass the address of
- // this copy to the callee.
- assert((j < ByValArgLocs.size()) && "Index out of bounds!");
- CCValAssign &ByValVA = ByValArgLocs[j++];
- assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+ // Note: Size includes alignment padding, so
+ // struct x { short a; char b; }
+ // will have Size = 4. With #pragma pack(1), it will have Size = 3.
+ // These are the proper values we need for right-justifying the
+ // aggregate in a parameter register.
+ unsigned Size = Flags.getByValSize();
- // Memory reserved in the local variable space of the callers stack frame.
- unsigned LocMemOffset = ByValVA.getLocMemOffset();
+ // An empty aggregate parameter takes up no storage and no
+ // registers.
+ if (Size == 0)
+ continue;
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (Size==1 || Size==2 || Size==4) {
+ EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- // Create a copy of the argument in the local area of the current
- // stack frame.
- SDValue MemcpyCall =
- CreateCopyOfByValArgument(Arg, PtrOff,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ }
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
- NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
+ if (GPR_idx == NumGPRs && Size < 8) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
- // Pass the address of the aggregate copy on the stack either in a
- // physical register or in the parameter list area of the current stack
- // frame to the callee.
- Arg = PtrOff;
+ // FIXME: The above statement is likely due to a misunderstanding of the
+ // documents. All arguments must be copied into the parameter area BY
+ // THE CALLEE in the event that the callee takes the address of any
+ // formal argument. That has not yet been implemented. However, it is
+ // reasonable to use the stack area as a staging area for the register
+ // load.
+
+ // Skip this for small aggregates, as we will use the same slot for a
+ // right-justified copy, below.
+ if (Size >= 8)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // When a register is available, pass a small aggregate right-justified.
+ if (Size < 8 && GPR_idx != NumGPRs) {
+ // The easiest way to get this right-justified in a register
+ // is to copy the structure into the rightmost portion of a
+ // local variable slot, then load the whole slot into the
+ // register.
+ // FIXME: The memcpy seems to produce pretty awful code for
+ // small aggregates, particularly for packed ones.
+ // FIXME: It would be preferable to use the slot in the
+ // parameter save area instead of a new local variable.
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // Load the slot into the register.
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ // Done with this argument.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // For aggregates larger than PtrByteSize, copy the pieces of the
+ // object that fit into registers from the parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
}
- if (VA.isRegLoc()) {
- seenFloatArg |= VA.getLocVT().isFloatingPoint();
- // Put argument in a physical register.
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- // Put argument in the parameter list area of the current stack frame.
- assert(VA.isMemLoc());
- unsigned LocMemOffset = VA.getLocMemOffset();
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (!isTailCall) {
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (isVarArg) {
+ // A single float or an aggregate containing only a single float
+ // must be passed right-justified in the stack doubleword, and
+ // in the GPR, if one is available.
+ SDValue StoreOff;
+ if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ } else
+ StoreOff = PtrOff;
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else if (GPR_idx != NumGPRs)
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ ++GPR_idx;
} else {
- // Calculate and remember argument location.
- CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
- TailCallArguments);
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
}
+ break;
}
}
@@ -3116,12 +3914,24 @@
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call with floating args passed in
- // registers.
- if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
- dl, MVT::i32), 0);
- RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ SDValue PtrOff = DAG.getIntPtrConstant(40);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ false, false, 0);
+ // R12 must contain the address of an indirect callee. This does not
+ // mean the MTCTR instruction must use R12; it's easier to model this
+ // as an extra parameter, so do that.
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3134,8 +3944,8 @@
}
if (isTailCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
- false, TailCallArguments);
+ PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
@@ -3152,7 +3962,7 @@
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- unsigned NumOps = Outs.size();
+ unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
@@ -3259,11 +4069,13 @@
}
// FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
if (Flags.isByVal()) {
unsigned Size = Flags.getByValSize();
+ // Very small objects are passed right-justified. Everything else is
+ // passed left-justified.
if (Size==1 || Size==2) {
- // Very small objects are passed right-justified.
- // Everything else is passed left-justified.
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
@@ -3274,17 +4086,12 @@
ArgOffset += PtrByteSize;
} else {
- SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
- NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
ArgOffset += PtrByteSize;
}
continue;
@@ -3292,15 +4099,13 @@
// Copy entire object into memory. There are cases where gcc-generated
// code assumes it is there, even if it could be put entirely into
// registers. (This is not what the doc says.)
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
- // And copy the pieces of it that fit into registers.
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
@@ -3369,11 +4174,10 @@
!isPPC64) // PPC64 has 64-bit GPR's obviously :)
++GPR_idx;
}
- } else {
+ } else
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, false, MemOpChains,
TailCallArguments, dl);
- }
if (isPPC64)
ArgOffset += 8;
else
@@ -3468,22 +4272,6 @@
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Check if this is an indirect call (MTCTR/BCTRL).
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
- if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG)) {
- // Load r2 into a virtual register and store it to the TOC save area.
- SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
- // TOC save area offset.
- SDValue PtrOff = DAG.getIntPtrConstant(40);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
- false, false, 0);
- }
-
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
@@ -3548,8 +4336,24 @@
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
+
+ SDValue Arg = OutVals[i];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
}
@@ -3781,7 +4585,52 @@
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
if (Op.getValueType() == MVT::f32)
FP = DAG.getNode(ISD::FP_ROUND, dl,
@@ -4126,7 +4975,7 @@
unsigned TypeShiftAmt = i & (SplatBitSize-1);
// vsplti + shl self.
- if (SextVal == (i << (int)TypeShiftAmt)) {
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
@@ -4171,17 +5020,17 @@
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
@@ -5630,6 +6479,14 @@
case 'v':
case 'y':
return C_RegisterClass;
+ case 'Z':
+ // FIXME: While Z does indicate a memory constraint, it specifically
+ // indicates an r+r address (used in conjunction with the 'y' modifier
+ // in the replacement string). Currently, we're forcing the base
+ // register to be r0 in the asm printer (which is interpreted as zero)
+ // and forming the complete address in the second register. This is
+ // suboptimal.
+ return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
@@ -5672,6 +6529,9 @@
case 'y':
weight = CW_Register;
break;
+ case 'Z':
+ weight = CW_Memory;
+ break;
}
return weight;
}
@@ -5688,9 +6548,9 @@
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &PPC::F4RCRegClass);
- if (VT == MVT::f64)
+ if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
@@ -5870,7 +6730,8 @@
bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
- !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ !MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::Naked);
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
(is31 ? PPC::R31 : PPC::R1);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCISelLowering.h Tue Jan 15 11:16:16 2013
@@ -174,6 +174,10 @@
/// operand #3 optional in flag
TC_RETURN,
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -463,20 +467,41 @@
DebugLoc dl, SelectionDAG &DAG) const;
SDValue
+ extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
+ SDValue ArgVal, DebugLoc dl) const;
+
+ void
+ setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+ unsigned nAltivecParamsAtEnd,
+ unsigned MinReservedArea, bool isPPC64) const;
+
+ SDValue
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
- LowerFormalArguments_SVR4(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ LowerFormalArguments_64SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_32SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG, DebugLoc dl) const;
SDValue
- LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
@@ -484,13 +509,22 @@
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
- LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
};
}
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstr64Bit.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstr64Bit.td Tue Jan 15 11:16:16 2013
@@ -29,6 +29,9 @@
let PrintMethod = "printSymbolLo";
let EncoderMethod = "getLO16Encoding";
}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -60,7 +63,7 @@
//
let Defs = [LR8] in
- def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>,
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
// Darwin ABI Calls.
@@ -138,31 +141,31 @@
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
[(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
[(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
[(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
[(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
[(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
[(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
[(set G8RC:$dst,
(atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
[(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
}
}
@@ -231,10 +234,10 @@
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst", BrB, []>;
- def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst", BrB, []>;
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
@@ -244,7 +247,7 @@
PPC970_MicroCode, PPC970_Unit_CRU;
def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
- "", SprMFCR>,
+ "#MFCR8pseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
@@ -275,7 +278,7 @@
// the POWER3.
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set G8RC:$result,
(PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
@@ -296,12 +299,14 @@
let PPC970_Unit = 1 in { // FXU Operations.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
[(set G8RC:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
[(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
@@ -459,7 +464,7 @@
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
- "sradi $rA, $rS, $SH", IntRotateD,
+ "sradi $rA, $rS, $SH", IntRotateDI,
[(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
@@ -482,23 +487,23 @@
let isCommutable = 1 in {
def RLDIMI : MDForm_1<30, 3,
(outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
// Rotate instructions.
def RLDCL : MDForm_1<30, 0,
- (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
- "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
+ "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
[]>, isPPC64;
def RLDICL : MDForm_1<30, 0,
- (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
def RLDICR : MDForm_1<30, 1,
- (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
- "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
def RLWINM8 : MForm_2<21,
@@ -506,7 +511,7 @@
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>;
-def ISEL8 : AForm_1<31, 15,
+def ISEL8 : AForm_4<31, 15,
(outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
@@ -541,19 +546,19 @@
let mayLoad = 1 in
def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLoad,
+ "lhau $rD, $disp($rA)", LdStLHAU,
[]>, RegConstraint<"$rA = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwaux $rD, $addr", LdStLoad,
+ "lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -584,31 +589,31 @@
// Update forms.
let mayLoad = 1 in {
def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -621,18 +626,26 @@
"ld $rD, $src", LdStLD,
[(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
- "",
+ "#LDtoc",
[(set G8RC:$rD,
(PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "#LDtocJTI",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "#LDtocCPT",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
let hasSideEffects = 1 in {
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+let RST = 2, DS = 2 in
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
[(PPCload_toc G8RC:$reg)]>, isPPC64;
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+let RST = 2, DS = 10, RA = 1 in
+def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
"ld 2, 40(1)", LdStLD,
[(PPCtoc_restore)]>, isPPC64;
}
@@ -642,13 +655,13 @@
let mayLoad = 1 in
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
- "ldu $rD, $addr", LdStLD,
+ "ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "ldux $rD, $addr", LdStLoad,
+ "ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -693,16 +706,16 @@
let PPC970_Unit = 2 in {
-def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -710,7 +723,7 @@
def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -718,7 +731,7 @@
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
[(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
@@ -727,7 +740,7 @@
def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -736,7 +749,7 @@
def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -745,7 +758,7 @@
def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -754,7 +767,7 @@
def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
[(set ptr_rc:$ea_res,
(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrAltivec.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrAltivec.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrAltivec.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrAltivec.td Tue Jan 15 11:16:16 2013
@@ -340,6 +340,28 @@
"vctuxs $vD, $vB, $UIMM", VecFP,
[(set VRRC:$vD,
(int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+
+// Defines with the UIM field set to 0 for floating-point
+// to integer (fp_to_sint/fp_to_uint) conversions and integer
+// to floating-point (sint_to_fp/uint_to_fp) conversions.
+let VA = 0 in {
+def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vcfsx $vD, $vB, 0", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vctuxs $vD, $vB, 0", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vcfux $vD, $vB, 0", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vctsxs $vD, $vB, 0", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+}
def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
@@ -689,3 +711,13 @@
(v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
(v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+
+// Float to integer and integer to float conversions
+def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
+ (VCTSXS_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
+ (VCTUXS_0 VRRC:$vA)>;
+def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
+ (VCFSX_0 VRRC:$vA)>;
+def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
+ (VCFUX_0 VRRC:$vA)>;
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrFormats.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrFormats.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrFormats.td Tue Jan 15 11:16:16 2013
@@ -94,12 +94,6 @@
let Inst{31} = lk;
}
-class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
- : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> {
- let LI{0-4} = bo;
-}
-
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
: I<opcode, OOL, IOL, asmstr, BrB> {
@@ -118,6 +112,13 @@
let Inst{31} = lk;
}
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr>
+ : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+ let BIBO{4-0} = bo;
+ let BIBO{6-5} = 0;
+ let CR = 0;
+}
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -625,9 +626,9 @@
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FXM;
- bits<5> ST;
+ bits<5> rS;
- let Inst{6-10} = ST;
+ let Inst{6-10} = rS;
let Inst{11} = 0;
let Inst{12-19} = FXM;
let Inst{20} = 0;
@@ -666,7 +667,7 @@
string cstr, InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
- bits<5> RT;
+ bits<5> rT;
bit RC = 0; // set by isDOT
let Pattern = pattern;
@@ -675,7 +676,7 @@
let Inst{6} = 0;
let Inst{7-14} = FM;
let Inst{15} = 0;
- let Inst{16-20} = RT;
+ let Inst{16-20} = rT;
let Inst{21-30} = xo;
let Inst{31} = RC;
}
@@ -758,6 +759,26 @@
let FRB = 0;
}
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12).
+ bits<3> CR;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-23} = CR;
+ let Inst{24-25} = BIBO{6-5};
+ let Inst{26-30} = xo;
+ let Inst{31} = 0;
+}
+
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.cpp Tue Jan 15 11:16:16 2013
@@ -54,7 +54,8 @@
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCScoreboardHazardRecognizer(II, DAG);
}
@@ -70,7 +71,8 @@
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// Most subtargets use a PPC970 recognizer.
- if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
const TargetInstrInfo *TII = TM.getInstrInfo();
assert(TII && "No InstrInfo?");
@@ -568,12 +570,15 @@
// STVX VAL, 0, R0
//
// FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+ unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
FrameIdx, 0, 0));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
.addReg(SrcReg, getKillRegState(isKill))
- .addReg(PPC::R0)
- .addReg(PPC::R0));
+ .addReg(GPR0)
+ .addReg(GPR0));
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -705,10 +710,13 @@
// Dest = LVX 0, R0
//
// FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+ unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
- .addReg(PPC::R0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
+ .addReg(GPR0));
} else {
llvm_unreachable("Unknown regclass!");
}
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCInstrInfo.td Tue Jan 15 11:16:16 2013
@@ -123,9 +123,11 @@
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
@@ -153,6 +155,12 @@
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
@@ -330,9 +338,6 @@
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
-}
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
@@ -364,9 +369,9 @@
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -375,7 +380,7 @@
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
[(set GPRC:$result,
(PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
@@ -384,19 +389,19 @@
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
- i32imm:$BROPC), "",
+ i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
- i32imm:$BROPC), "",
+ i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
- i32imm:$BROPC), "",
+ i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
- i32imm:$BROPC), "",
+ i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
- i32imm:$BROPC), "",
+ i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
}
@@ -404,16 +409,16 @@
// scavenge a register for it.
let mayStore = 1 in
def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
- "", []>;
+ "#SPILL_CR", []>;
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
- "", []>;
+ "#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isReturn = 1, Uses = [LR, RM] in
+ let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
"b${p:cc}lr ${p:reg}", BrB,
[(retflag)]>;
@@ -422,7 +427,7 @@
}
let Defs = [LR] in
- def MovePCtoLR : Pseudo<(outs), (ins), "", []>,
+ def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -434,16 +439,17 @@
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
- // a two-value operand where a dag node expects two operands. :(
- def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc} ${cond:reg}, $dst"
- /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+ // a two-value operand where a dag node expects two operands. :(
+ let isCodeGenOnly = 1 in
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
let Defs = [CTR], Uses = [CTR] in {
- def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst", BrB, []>;
- def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst", BrB, []>;
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
@@ -559,81 +565,81 @@
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
[(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
[(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
[(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
[(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
[(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
[(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
[(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
[(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
[(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
[(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
[(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
[(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
[(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
[(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
[(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
[(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
[(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
[(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
[(set GPRC:$dst,
(atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
[(set GPRC:$dst,
(atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
[(set GPRC:$dst,
(atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
[(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
[(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
[(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
}
}
@@ -673,7 +679,7 @@
[(set GPRC:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
- "lfs $rD, $src", LdStLFDU,
+ "lfs $rD, $src", LdStLFD,
[(set F4RC:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
@@ -683,32 +689,32 @@
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStLoad,
+ "lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfs $rD, $addr", LdStLFDU,
+ "lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfd $rD, $addr", LdStLFD,
+ "lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -716,37 +722,37 @@
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfsux $rD, $addr", LdStLoad,
+ "lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfdux $rD, $addr", LdStLoad,
+ "lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -778,10 +784,10 @@
[(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
- "lfsx $frD, $src", LdStLFDU,
+ "lfsx $frD, $src", LdStLFD,
[(set F4RC:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
- "lfdx $frD, $src", LdStLFDU,
+ "lfdx $frD, $src", LdStLFD,
[(set F8RC:$frD, (load xaddr:$src))]>;
}
@@ -801,10 +807,10 @@
"stw $rS, $src", LdStStore,
[(store GPRC:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
- "stfs $rS, $dst", LdStUX,
+ "stfs $rS, $dst", LdStSTFD,
[(store F4RC:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
- "stfd $rS, $dst", LdStUX,
+ "stfd $rS, $dst", LdStSTFD,
[(store F8RC:$rS, iaddr:$dst)]>;
}
@@ -812,33 +818,33 @@
let PPC970_Unit = 2 in {
def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
@@ -863,7 +869,7 @@
def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -872,7 +878,7 @@
def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -881,7 +887,7 @@
def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -889,7 +895,7 @@
def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
(ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -897,7 +903,7 @@
def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
(ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -913,14 +919,14 @@
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfiwx $frS, $dst", LdStUX,
+ "stfiwx $frS, $dst", LdStSTFD,
[(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
- "stfsx $frS, $dst", LdStUX,
+ "stfsx $frS, $dst", LdStSTFD,
[(store F4RC:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfdx $frS, $dst", LdStUX,
+ "stfdx $frS, $dst", LdStSTFD,
[(store F8RC:$frS, xaddr:$dst)]>;
}
@@ -964,7 +970,7 @@
[(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
[(set GPRC:$rD, immSExt16:$imm)]>;
@@ -1143,6 +1149,16 @@
"crxor $dst, $dst, $dst", BrCR,
[]>;
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", BrCR,
+ [(PPCcr6unset)]>;
+}
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
@@ -1192,7 +1208,7 @@
//
// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
- "", SprMFCR>,
+ "#MFCRpseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
@@ -1233,7 +1249,7 @@
PPC970_DGroup_Single, PPC970_Unit_FPU;
def FADDrtz: AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1364,7 +1380,7 @@
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
@@ -1379,16 +1395,16 @@
"fdivs $FRT, $FRA, $FRB", FPDivS,
[(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
def FMUL : AForm_3<63, 25,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fmul $FRT, $FRA, $FRB", FPFused,
- [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
+ "fmul $FRT, $FRA, $FRC", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
def FMULS : AForm_3<59, 25,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
- "fmuls $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
+ "fmuls $FRT, $FRA, $FRC", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fsub $FRT, $FRA, $FRB", FPGeneral,
+ "fsub $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
@@ -1398,7 +1414,7 @@
}
let PPC970_Unit = 1 in { // FXU Operations.
- def ISEL : AForm_1<31, 15,
+ def ISEL : AForm_4<31, 15,
(outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Jan 15 11:16:16 2013
@@ -71,7 +71,7 @@
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii) {
+ Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -111,10 +111,15 @@
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
+ // For 32-bit SVR4, also initialize the frame index associated with
+ // the CR spill slot.
+ if (!Subtarget.isPPC64())
+ CRSpillFrameIdx = 0;
+
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
-const unsigned*
+const uint32_t*
PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
@@ -477,6 +482,31 @@
MBB.erase(II);
}
+bool
+PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+
+ // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
+ // ABI, return true to prevent allocating an additional frame slot.
+ // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
+ // is arbitrary and will be subsequently ignored. For 32-bit, we must
+ // create exactly one stack slot and return its FrameIdx for all
+ // nonvolatiles.
+ if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ if (Subtarget.isPPC64()) {
+ FrameIdx = 0;
+ } else if (CRSpillFrameIdx) {
+ FrameIdx = CRSpillFrameIdx;
+ } else {
+ MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
+ FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ CRSpillFrameIdx = FrameIdx;
+ }
+ return true;
+ }
+ return false;
+}
+
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {
@@ -566,7 +596,7 @@
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCRegisterInfo.h Tue Jan 15 11:16:16 2013
@@ -30,6 +30,7 @@
std::map<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
+ mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@@ -43,7 +44,7 @@
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -65,6 +66,8 @@
int SPAdj, RegScavenger *RS) const;
void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule.td Tue Jan 15 11:16:16 2013
@@ -40,6 +40,7 @@
def IntMulLI : InstrItinClass;
def IntRFID : InstrItinClass;
def IntRotateD : InstrItinClass;
+def IntRotateDI : InstrItinClass;
def IntRotate : InstrItinClass;
def IntShift : InstrItinClass;
def IntTrapD : InstrItinClass;
@@ -52,15 +53,18 @@
def LdStDCBF : InstrItinClass;
def LdStDCBI : InstrItinClass;
def LdStLoad : InstrItinClass;
+def LdStLoadUpd : InstrItinClass;
def LdStStore : InstrItinClass;
+def LdStStoreUpd : InstrItinClass;
def LdStDSS : InstrItinClass;
def LdStICBI : InstrItinClass;
-def LdStUX : InstrItinClass;
def LdStLD : InstrItinClass;
+def LdStLDU : InstrItinClass;
def LdStLDARX : InstrItinClass;
def LdStLFD : InstrItinClass;
def LdStLFDU : InstrItinClass;
def LdStLHA : InstrItinClass;
+def LdStLHAU : InstrItinClass;
def LdStLMW : InstrItinClass;
def LdStLVecX : InstrItinClass;
def LdStLWA : InstrItinClass;
@@ -69,6 +73,9 @@
def LdStSLBIE : InstrItinClass;
def LdStSTD : InstrItinClass;
def LdStSTDCX : InstrItinClass;
+def LdStSTDU : InstrItinClass;
+def LdStSTFD : InstrItinClass;
+def LdStSTFDU : InstrItinClass;
def LdStSTVEBX : InstrItinClass;
def LdStSTWCX : InstrItinClass;
def LdStSync : InstrItinClass;
@@ -86,6 +93,7 @@
def SprRFI : InstrItinClass;
def SprSC : InstrItinClass;
def FPGeneral : InstrItinClass;
+def FPAddSub : InstrItinClass;
def FPCompare : InstrItinClass;
def FPDivD : InstrItinClass;
def FPDivS : InstrItinClass;
@@ -110,6 +118,8 @@
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
//===----------------------------------------------------------------------===//
// Instruction to itinerary class map - When add new opcodes to the supported
@@ -171,7 +181,7 @@
// extsh IntSimple
// extsw IntSimple
// fabs FPGeneral
-// fadd FPGeneral
+// fadd FPAddSub
// fadds FPGeneral
// fcfid FPGeneral
// fcmpo FPCompare
@@ -201,35 +211,35 @@
// fsel FPGeneral
// fsqrt FPSqrt
// fsqrts FPSqrt
-// fsub FPGeneral
+// fsub FPAddSub
// fsubs FPGeneral
// icbi LdStICBI
// isync SprISYNC
// lbz LdStLoad
-// lbzu LdStLoad
-// lbzux LdStUX
+// lbzu LdStLoadUpd
+// lbzux LdStLoadUpd
// lbzx LdStLoad
// ld LdStLD
// ldarx LdStLDARX
-// ldu LdStLD
-// ldux LdStLD
+// ldu LdStLDU
+// ldux LdStLDU
// ldx LdStLD
// lfd LdStLFD
// lfdu LdStLFDU
// lfdux LdStLFDU
-// lfdx LdStLFDU
-// lfs LdStLFDU
+// lfdx LdStLFD
+// lfs LdStLFD
// lfsu LdStLFDU
// lfsux LdStLFDU
-// lfsx LdStLFDU
+// lfsx LdStLFD
// lha LdStLHA
-// lhau LdStLHA
-// lhaux LdStLHA
+// lhau LdStLHAU
+// lhaux LdStLHAU
// lhax LdStLHA
// lhbrx LdStLoad
// lhz LdStLoad
-// lhzu LdStLoad
-// lhzux LdStUX
+// lhzu LdStLoadUpd
+// lhzux LdStLoadUpd
// lhzx LdStLoad
// lmw LdStLMW
// lswi LdStLMW
@@ -243,12 +253,12 @@
// lvxl LdStLVecX
// lwa LdStLWA
// lwarx LdStLWARX
-// lwaux LdStLHA
+// lwaux LdStLHAU
// lwax LdStLHA
// lwbrx LdStLoad
// lwz LdStLoad
-// lwzu LdStLoad
-// lwzux LdStUX
+// lwzu LdStLoadUpd
+// lwzux LdStLoadUpd
// lwzx LdStLoad
// mcrf BrMCR
// mcrfs FPGeneral
@@ -292,10 +302,10 @@
// rfid IntRFID
// rldcl IntRotateD
// rldcr IntRotateD
-// rldic IntRotateD
-// rldicl IntRotateD
-// rldicr IntRotateD
-// rldimi IntRotateD
+// rldic IntRotateDI
+// rldicl IntRotateDI
+// rldicr IntRotateDI
+// rldimi IntRotateDI
// rlwimi IntRotate
// rlwinm IntGeneral
// rlwnm IntGeneral
@@ -305,33 +315,33 @@
// sld IntRotateD
// slw IntGeneral
// srad IntRotateD
-// sradi IntRotateD
+// sradi IntRotateDI
// sraw IntShift
// srawi IntShift
// srd IntRotateD
// srw IntGeneral
// stb LdStStore
-// stbu LdStStore
-// stbux LdStStore
+// stbu LdStStoreUpd
+// stbux LdStStoreUpd
// stbx LdStStore
// std LdStSTD
// stdcx. LdStSTDCX
-// stdu LdStSTD
-// stdux LdStSTD
+// stdu LdStSTDU
+// stdux LdStSTDU
// stdx LdStSTD
-// stfd LdStUX
-// stfdu LdStUX
-// stfdux LdStUX
-// stfdx LdStUX
-// stfiwx LdStUX
-// stfs LdStUX
-// stfsu LdStUX
-// stfsux LdStUX
-// stfsx LdStUX
+// stfd LdStSTFD
+// stfdu LdStSTFDU
+// stfdux LdStSTFDU
+// stfdx LdStSTFD
+// stfiwx LdStSTFD
+// stfs LdStSTFD
+// stfsu LdStSTFDU
+// stfsux LdStSTFDU
+// stfsx LdStSTFD
// sth LdStStore
// sthbrx LdStStore
-// sthu LdStStore
-// sthux LdStStore
+// sthu LdStStoreUpd
+// sthux LdStStoreUpd
// sthx LdStStore
// stmw LdStLMW
// stswi LdStLMW
@@ -344,8 +354,8 @@
// stw LdStStore
// stwbrx LdStStore
// stwcx. LdStSTWCX
-// stwu LdStStore
-// stwux LdStStore
+// stwu LdStStoreUpd
+// stwux LdStStoreUpd
// stwx LdStStore
// subf IntGeneral
// subfc IntGeneral
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule440.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule440.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule440.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSchedule440.td Tue Jan 15 11:16:16 2013
@@ -288,6 +288,15 @@
InstrStage<2, [LWB]>],
[9, 5],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -297,6 +306,15 @@
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -306,7 +324,7 @@
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
@@ -315,6 +333,15 @@
InstrStage<1, [LWB]>],
[8, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -342,6 +369,15 @@
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -371,6 +407,15 @@
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
@@ -537,6 +582,19 @@
InstrStage<1, [FWB]>],
[10, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleA2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleA2.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleA2.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleA2.td Tue Jan 15 11:16:16 2013
@@ -181,6 +181,17 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -302,7 +313,18 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLD , [InstrStage<4,
+ InstrItinData<LdStLoadUpd , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -324,6 +346,17 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -335,7 +368,7 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<4,
+ InstrItinData<LdStSTFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -346,6 +379,17 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7, 7],
[NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -379,6 +423,17 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -412,6 +467,17 @@
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -593,6 +659,17 @@
InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
[15, 7, 7],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG3.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG3.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG3.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG3.td Tue Jan 15 11:16:16 2013
@@ -34,12 +34,16 @@
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
@@ -58,6 +62,7 @@
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4.td Tue Jan 15 11:16:16 2013
@@ -33,13 +33,17 @@
InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
@@ -60,6 +64,7 @@
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4Plus.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4Plus.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4Plus.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG4Plus.td Tue Jan 15 11:16:16 2013
@@ -36,19 +36,24 @@
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
- InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
@@ -66,6 +71,7 @@
InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG5.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG5.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG5.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCScheduleG5.td Tue Jan 15 11:16:16 2013
@@ -27,6 +27,7 @@
InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
@@ -37,15 +38,20 @@
InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
@@ -53,6 +59,7 @@
InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
@@ -69,6 +76,7 @@
InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.cpp Tue Jan 15 11:16:16 2013
@@ -54,19 +54,26 @@
CPUName = sys::getHostCPUName();
#endif
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
+ // Make sure 64-bit features are available when CPUname is generic
+ std::string FullFS = FS;
+
// If we are generating code for ppc64, verify that options make sense.
if (is64Bit) {
Has64BitSupport = true;
// Silently force 64-bit register use on ppc64.
Use64BitRegs = true;
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
}
-
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
if (use64BitRegs() && !has64BitSupport())
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCSubtarget.h Tue Jan 15 11:16:16 2013
@@ -33,32 +33,34 @@
enum {
DIR_NONE,
DIR_32,
- DIR_440,
- DIR_601,
- DIR_602,
- DIR_603,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
DIR_7400,
- DIR_750,
- DIR_970,
+ DIR_750,
+ DIR_970,
DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
DIR_PWR6,
DIR_PWR7,
- DIR_64
+ DIR_64
};
}
class GlobalValue;
class TargetMachine;
-
+
class PPCSubtarget : public PPCGenSubtargetInfo {
protected:
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
-
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
-
+
/// Which cpu directive was used.
unsigned DarwinDirective;
@@ -74,7 +76,7 @@
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
-
+
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -84,11 +86,11 @@
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
+
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void SetJITMode();
@@ -97,20 +99,27 @@
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
unsigned getStackAlignment() const { return StackAlignment; }
-
+
/// getDarwinDirective - Returns the -m directive specified for the cpu.
///
unsigned getDarwinDirective() const { return DarwinDirective; }
-
- /// getInstrItins - Return the instruction itineraies based on subtarget
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- /// getTargetDataString - Return the pointer size and type alignment
+ /// getDataLayoutString - Return the pointer size and type alignment
/// properties of this subtarget.
- const char *getTargetDataString() const {
+ const char *getDataLayoutString() const {
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
+ if (isPPC64() && isSVR4ABI()) {
+ if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
+ return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64";
+ else
+ return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
+ }
+
return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
: "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
}
@@ -118,22 +127,22 @@
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
///
bool isPPC64() const { return IsPPC64; }
-
+
/// has64BitSupport - Return true if the selected CPU supports 64-bit
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
-
+
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
/// has64BitSupport() returns true.
bool use64BitRegs() const { return Use64BitRegs; }
-
+
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
- bool hasLazyResolverStub(const GlobalValue *GV,
+ bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
-
+
// isJITCodeModel - True if we're generating code for the JIT
bool isJITCodeModel() const { return IsJITCodeModel; }
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.cpp Tue Jan 15 11:16:16 2013
@@ -40,10 +40,11 @@
bool is64Bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
- DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ InstrItins(Subtarget.getInstrItineraryData()),
+ STTI(&TLInfo), VTTI(&TLInfo) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
Modified: llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/PowerPC/PPCTargetMachine.h Tue Jan 15 11:16:16 2013
@@ -21,7 +21,8 @@
#include "PPCISelLowering.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/DataLayout.h"
namespace llvm {
@@ -29,13 +30,15 @@
///
class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
+ const DataLayout DL; // Calculates type size & alignment
PPCInstrInfo InstrInfo;
PPCFrameLowering FrameLowering;
PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -58,11 +61,17 @@
return &InstrInfo.getRegisterInfo();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
Modified: llvm/branches/AMDILBackend/lib/Target/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/README.txt?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/README.txt (original)
+++ llvm/branches/AMDILBackend/lib/Target/README.txt Tue Jan 15 11:16:16 2013
@@ -152,7 +152,7 @@
//===---------------------------------------------------------------------===//
-For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal
to the type size. It works but can be overly conservative as the alignment of
specific vector types are target dependent.
@@ -2367,8 +2367,3 @@
should fold to x > y.
//===---------------------------------------------------------------------===//
-
-int f(double x) { return __builtin_fabs(x) < 0.0; }
-should fold to false.
-
-//===---------------------------------------------------------------------===//
Modified: llvm/branches/AMDILBackend/lib/Target/Sparc/SparcFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Sparc/SparcFrameLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Sparc/SparcFrameLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/Sparc/SparcFrameLowering.cpp Tue Jan 15 11:16:16 2013
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
Modified: llvm/branches/AMDILBackend/lib/Target/Sparc/SparcISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Sparc/SparcISelLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Sparc/SparcISelLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/Sparc/SparcISelLowering.cpp Tue Jan 15 11:16:16 2013
@@ -637,7 +637,7 @@
PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
Type *ElementTy = Ty->getElementType();
- return getTargetData()->getTypeAllocSize(ElementTy);
+ return getDataLayout()->getTypeAllocSize(ElementTy);
}
//===----------------------------------------------------------------------===//
Modified: llvm/branches/AMDILBackend/lib/Target/Sparc/SparcInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Sparc/SparcInstrInfo.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Sparc/SparcInstrInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/Sparc/SparcInstrInfo.td Tue Jan 15 11:16:16 2013
@@ -129,7 +129,7 @@
[SDNPHasChain, SDNPOptInGlue]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
Modified: llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.cpp Tue Jan 15 11:16:16 2013
@@ -33,10 +33,10 @@
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
- DataLayout(Subtarget.getDataLayout()),
+ DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
Modified: llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/Sparc/SparcTargetMachine.h Tue Jan 15 11:16:16 2013
@@ -20,18 +20,21 @@
#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
+ const DataLayout DL; // Calculates type size & alignment
SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -52,7 +55,13 @@
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
Modified: llvm/branches/AMDILBackend/lib/Target/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/Target.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/Target.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/Target.cpp Tue Jan 15 11:16:16 2013
@@ -16,7 +16,7 @@
#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/LLVMContext.h"
#include <cstring>
@@ -24,8 +24,9 @@
using namespace llvm;
void llvm::initializeTarget(PassRegistry &Registry) {
- initializeTargetDataPass(Registry);
+ initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
+ initializeTargetTransformInfoPass(Registry);
}
void LLVMInitializeTarget(LLVMPassRegistryRef R) {
@@ -33,11 +34,11 @@
}
LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
- return wrap(new TargetData(StringRep));
+ return wrap(new DataLayout(StringRep));
}
void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
- unwrap(PM)->add(new TargetData(*unwrap(TD)));
+ unwrap(PM)->add(new DataLayout(*unwrap(TD)));
}
void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
@@ -55,13 +56,21 @@
}
unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
- return unwrap(TD)->getPointerSize();
+ return unwrap(TD)->getPointerSize(0);
+}
+
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return unwrap(TD)->getPointerSize(AS);
}
LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
}
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
+}
+
unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
}
Removed: llvm/branches/AMDILBackend/lib/Target/TargetData.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetData.cpp?rev=172540&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetData.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetData.cpp (removed)
@@ -1,663 +0,0 @@
-//===-- TargetData.cpp - Data size & alignment routines --------------------==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines target properties related to datatype size/offset/alignment
-// information.
-//
-// This structure should be created once, filled in if the defaults are not
-// correct and then passed around by const&. None of the members functions
-// require modification to the object.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetData.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/ADT/DenseMap.h"
-#include <algorithm>
-#include <cstdlib>
-using namespace llvm;
-
-// Handle the Pass registration stuff necessary to use TargetData's.
-
-// Register the default SparcV9 implementation...
-INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
-char TargetData::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// Support for StructLayout
-//===----------------------------------------------------------------------===//
-
-StructLayout::StructLayout(StructType *ST, const TargetData &TD) {
- assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
- StructAlignment = 0;
- StructSize = 0;
- NumElements = ST->getNumElements();
-
- // Loop over each of the elements, placing them in memory.
- for (unsigned i = 0, e = NumElements; i != e; ++i) {
- Type *Ty = ST->getElementType(i);
- unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
-
- // Add padding if necessary to align the data element properly.
- if ((StructSize & (TyAlign-1)) != 0)
- StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
-
- // Keep track of maximum alignment constraint.
- StructAlignment = std::max(TyAlign, StructAlignment);
-
- MemberOffsets[i] = StructSize;
- StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
- }
-
- // Empty structures have alignment of 1 byte.
- if (StructAlignment == 0) StructAlignment = 1;
-
- // Add padding to the end of the struct so that it could be put in an array
- // and all array elements would be aligned correctly.
- if ((StructSize & (StructAlignment-1)) != 0)
- StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
-}
-
-
-/// getElementContainingOffset - Given a valid offset into the structure,
-/// return the structure index that contains it.
-unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
- const uint64_t *SI =
- std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
- assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
- --SI;
- assert(*SI <= Offset && "upper_bound didn't work");
- assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
- (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
- "Upper bound didn't work!");
-
- // Multiple fields can have the same offset if any of them are zero sized.
- // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
- // at the i32 element, because it is the last element at that offset. This is
- // the right one to return, because anything after it will have a higher
- // offset, implying that this element is non-empty.
- return SI-&MemberOffsets[0];
-}
-
-//===----------------------------------------------------------------------===//
-// TargetAlignElem, TargetAlign support
-//===----------------------------------------------------------------------===//
-
-TargetAlignElem
-TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width) {
- assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
- TargetAlignElem retval;
- retval.AlignType = align_type;
- retval.ABIAlign = abi_align;
- retval.PrefAlign = pref_align;
- retval.TypeBitWidth = bit_width;
- return retval;
-}
-
-bool
-TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
- return (AlignType == rhs.AlignType
- && ABIAlign == rhs.ABIAlign
- && PrefAlign == rhs.PrefAlign
- && TypeBitWidth == rhs.TypeBitWidth);
-}
-
-const TargetAlignElem
-TargetData::InvalidAlignmentElem = { (AlignTypeEnum)0xFF, 0, 0, 0 };
-
-//===----------------------------------------------------------------------===//
-// TargetData Class Implementation
-//===----------------------------------------------------------------------===//
-
-/// getInt - Get an integer ignoring errors.
-static int getInt(StringRef R) {
- int Result = 0;
- R.getAsInteger(10, Result);
- return Result;
-}
-
-void TargetData::init() {
- initializeTargetDataPass(*PassRegistry::getPassRegistry());
-
- LayoutMap = 0;
- LittleEndian = false;
- PointerMemSize = 8;
- PointerABIAlign = 8;
- PointerPrefAlign = PointerABIAlign;
- StackNaturalAlign = 0;
-
- // Default alignments
- setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
- setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
- setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
- setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
- setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
- setAlignment(FLOAT_ALIGN, 2, 2, 16); // half
- setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
- setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
- setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ...
- setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ...
- setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
- setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
-}
-
-std::string TargetData::parseSpecifier(StringRef Desc, TargetData *td) {
-
- if (td)
- td->init();
-
- while (!Desc.empty()) {
- std::pair<StringRef, StringRef> Split = Desc.split('-');
- StringRef Token = Split.first;
- Desc = Split.second;
-
- if (Token.empty())
- continue;
-
- Split = Token.split(':');
- StringRef Specifier = Split.first;
- Token = Split.second;
-
- assert(!Specifier.empty() && "Can't be empty here");
-
- switch (Specifier[0]) {
- case 'E':
- if (td)
- td->LittleEndian = false;
- break;
- case 'e':
- if (td)
- td->LittleEndian = true;
- break;
- case 'p': {
- // Pointer size.
- Split = Token.split(':');
- int PointerMemSizeBits = getInt(Split.first);
- if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0)
- return "invalid pointer size, must be a positive 8-bit multiple";
- if (td)
- td->PointerMemSize = PointerMemSizeBits / 8;
-
- // Pointer ABI alignment.
- Split = Split.second.split(':');
- int PointerABIAlignBits = getInt(Split.first);
- if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) {
- return "invalid pointer ABI alignment, "
- "must be a positive 8-bit multiple";
- }
- if (td)
- td->PointerABIAlign = PointerABIAlignBits / 8;
-
- // Pointer preferred alignment.
- Split = Split.second.split(':');
- int PointerPrefAlignBits = getInt(Split.first);
- if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) {
- return "invalid pointer preferred alignment, "
- "must be a positive 8-bit multiple";
- }
- if (td) {
- td->PointerPrefAlign = PointerPrefAlignBits / 8;
- if (td->PointerPrefAlign == 0)
- td->PointerPrefAlign = td->PointerABIAlign;
- }
- break;
- }
- case 'i':
- case 'v':
- case 'f':
- case 'a':
- case 's': {
- AlignTypeEnum AlignType;
- char field = Specifier[0];
- switch (field) {
- default:
- case 'i': AlignType = INTEGER_ALIGN; break;
- case 'v': AlignType = VECTOR_ALIGN; break;
- case 'f': AlignType = FLOAT_ALIGN; break;
- case 'a': AlignType = AGGREGATE_ALIGN; break;
- case 's': AlignType = STACK_ALIGN; break;
- }
- int Size = getInt(Specifier.substr(1));
- if (Size < 0) {
- return std::string("invalid ") + field + "-size field, "
- "must be positive";
- }
-
- Split = Token.split(':');
- int ABIAlignBits = getInt(Split.first);
- if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) {
- return std::string("invalid ") + field +"-abi-alignment field, "
- "must be a positive 8-bit multiple";
- }
- unsigned ABIAlign = ABIAlignBits / 8;
-
- Split = Split.second.split(':');
-
- int PrefAlignBits = getInt(Split.first);
- if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) {
- return std::string("invalid ") + field +"-preferred-alignment field, "
- "must be a positive 8-bit multiple";
- }
- unsigned PrefAlign = PrefAlignBits / 8;
- if (PrefAlign == 0)
- PrefAlign = ABIAlign;
-
- if (td)
- td->setAlignment(AlignType, ABIAlign, PrefAlign, Size);
- break;
- }
- case 'n': // Native integer types.
- Specifier = Specifier.substr(1);
- do {
- int Width = getInt(Specifier);
- if (Width <= 0) {
- return std::string("invalid native integer size \'") + Specifier.str() +
- "\', must be a positive integer.";
- }
- if (td && Width != 0)
- td->LegalIntWidths.push_back(Width);
- Split = Token.split(':');
- Specifier = Split.first;
- Token = Split.second;
- } while (!Specifier.empty() || !Token.empty());
- break;
- case 'S': { // Stack natural alignment.
- int StackNaturalAlignBits = getInt(Specifier.substr(1));
- if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) {
- return "invalid natural stack alignment (S-field), "
- "must be a positive 8-bit multiple";
- }
- if (td)
- td->StackNaturalAlign = StackNaturalAlignBits / 8;
- break;
- }
- default:
- break;
- }
- }
-
- return "";
-}
-
-/// Default ctor.
-///
-/// @note This has to exist, because this is a pass, but it should never be
-/// used.
-TargetData::TargetData() : ImmutablePass(ID) {
- report_fatal_error("Bad TargetData ctor used. "
- "Tool did not specify a TargetData to use?");
-}
-
-TargetData::TargetData(const Module *M)
- : ImmutablePass(ID) {
- std::string errMsg = parseSpecifier(M->getDataLayout(), this);
- assert(errMsg == "" && "Module M has malformed target data layout string.");
- (void)errMsg;
-}
-
-void
-TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width) {
- assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
- for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
- if (Alignments[i].AlignType == align_type &&
- Alignments[i].TypeBitWidth == bit_width) {
- // Update the abi, preferred alignments.
- Alignments[i].ABIAlign = abi_align;
- Alignments[i].PrefAlign = pref_align;
- return;
- }
- }
-
- Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
- pref_align, bit_width));
-}
-
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
-/// preferred if ABIInfo = false) the target wants for the specified datatype.
-unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
- uint32_t BitWidth, bool ABIInfo,
- Type *Ty) const {
- // Check to see if we have an exact match and remember the best match we see.
- int BestMatchIdx = -1;
- int LargestInt = -1;
- for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
- if (Alignments[i].AlignType == AlignType &&
- Alignments[i].TypeBitWidth == BitWidth)
- return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-
- // The best match so far depends on what we're looking for.
- if (AlignType == INTEGER_ALIGN &&
- Alignments[i].AlignType == INTEGER_ALIGN) {
- // The "best match" for integers is the smallest size that is larger than
- // the BitWidth requested.
- if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
- Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
- BestMatchIdx = i;
- // However, if there isn't one that's larger, then we must use the
- // largest one we have (see below)
- if (LargestInt == -1 ||
- Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
- LargestInt = i;
- }
- }
-
- // Okay, we didn't find an exact solution. Fall back here depending on what
- // is being looked for.
- if (BestMatchIdx == -1) {
- // If we didn't find an integer alignment, fall back on most conservative.
- if (AlignType == INTEGER_ALIGN) {
- BestMatchIdx = LargestInt;
- } else {
- assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
-
- // By default, use natural alignment for vector types. This is consistent
- // with what clang and llvm-gcc do.
- unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
- Align *= cast<VectorType>(Ty)->getNumElements();
- // If the alignment is not a power of 2, round up to the next power of 2.
- // This happens for non-power-of-2 length vectors.
- if (Align & (Align-1))
- Align = NextPowerOf2(Align);
- return Align;
- }
- }
-
- // Since we got a "best match" index, just return it.
- return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
- : Alignments[BestMatchIdx].PrefAlign;
-}
-
-namespace {
-
-class StructLayoutMap {
- typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
- LayoutInfoTy LayoutInfo;
-
-public:
- virtual ~StructLayoutMap() {
- // Remove any layouts.
- for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
- I != E; ++I) {
- StructLayout *Value = I->second;
- Value->~StructLayout();
- free(Value);
- }
- }
-
- StructLayout *&operator[](StructType *STy) {
- return LayoutInfo[STy];
- }
-
- // for debugging...
- virtual void dump() const {}
-};
-
-} // end anonymous namespace
-
-TargetData::~TargetData() {
- delete static_cast<StructLayoutMap*>(LayoutMap);
-}
-
-const StructLayout *TargetData::getStructLayout(StructType *Ty) const {
- if (!LayoutMap)
- LayoutMap = new StructLayoutMap();
-
- StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
- StructLayout *&SL = (*STM)[Ty];
- if (SL) return SL;
-
- // Otherwise, create the struct layout. Because it is variable length, we
- // malloc it, then use placement new.
- int NumElts = Ty->getNumElements();
- StructLayout *L =
- (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
-
- // Set SL before calling StructLayout's ctor. The ctor could cause other
- // entries to be added to TheMap, invalidating our reference.
- SL = L;
-
- new (L) StructLayout(Ty, *this);
-
- return L;
-}
-
-std::string TargetData::getStringRepresentation() const {
- std::string Result;
- raw_string_ostream OS(Result);
-
- OS << (LittleEndian ? "e" : "E")
- << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
- << ':' << PointerPrefAlign*8
- << "-S" << StackNaturalAlign*8;
-
- for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
- const TargetAlignElem &AI = Alignments[i];
- OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
- << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
- }
-
- if (!LegalIntWidths.empty()) {
- OS << "-n" << (unsigned)LegalIntWidths[0];
-
- for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
- OS << ':' << (unsigned)LegalIntWidths[i];
- }
- return OS.str();
-}
-
-
-uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
- assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
- switch (Ty->getTypeID()) {
- case Type::LabelTyID:
- case Type::PointerTyID:
- return getPointerSizeInBits();
- case Type::ArrayTyID: {
- ArrayType *ATy = cast<ArrayType>(Ty);
- return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
- }
- case Type::StructTyID:
- // Get the layout annotation... which is lazily created on demand.
- return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::IntegerTyID:
- return cast<IntegerType>(Ty)->getBitWidth();
- case Type::VoidTyID:
- return 8;
- case Type::HalfTyID:
- return 16;
- case Type::FloatTyID:
- return 32;
- case Type::DoubleTyID:
- case Type::X86_MMXTyID:
- return 64;
- case Type::PPC_FP128TyID:
- case Type::FP128TyID:
- return 128;
- // In memory objects this is always aligned to a higher boundary, but
- // only 80 bits contain information.
- case Type::X86_FP80TyID:
- return 80;
- case Type::VectorTyID:
- return cast<VectorType>(Ty)->getBitWidth();
- default:
- llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
- }
-}
-
-/*!
- \param abi_or_pref Flag that determines which alignment is returned. true
- returns the ABI alignment, false returns the preferred alignment.
- \param Ty The underlying type for which alignment is determined.
-
- Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
- == false) for the requested type \a Ty.
- */
-unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
- int AlignType = -1;
-
- assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
- switch (Ty->getTypeID()) {
- // Early escape for the non-numeric types.
- case Type::LabelTyID:
- case Type::PointerTyID:
- return (abi_or_pref
- ? getPointerABIAlignment()
- : getPointerPrefAlignment());
- case Type::ArrayTyID:
- return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
-
- case Type::StructTyID: {
- // Packed structure types always have an ABI alignment of one.
- if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
- return 1;
-
- // Get the layout annotation... which is lazily created on demand.
- const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
- unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
- return std::max(Align, Layout->getAlignment());
- }
- case Type::IntegerTyID:
- case Type::VoidTyID:
- AlignType = INTEGER_ALIGN;
- break;
- case Type::HalfTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- // PPC_FP128TyID and FP128TyID have different data contents, but the
- // same size and alignment, so they look the same here.
- case Type::PPC_FP128TyID:
- case Type::FP128TyID:
- case Type::X86_FP80TyID:
- AlignType = FLOAT_ALIGN;
- break;
- case Type::X86_MMXTyID:
- case Type::VectorTyID:
- AlignType = VECTOR_ALIGN;
- break;
- default:
- llvm_unreachable("Bad type for getAlignment!!!");
- }
-
- return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
- abi_or_pref, Ty);
-}
-
-unsigned TargetData::getABITypeAlignment(Type *Ty) const {
- return getAlignment(Ty, true);
-}
-
-/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
-/// an integer type of the specified bitwidth.
-unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
- return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
-}
-
-
-unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const {
- for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
- if (Alignments[i].AlignType == STACK_ALIGN)
- return Alignments[i].ABIAlign;
-
- return getABITypeAlignment(Ty);
-}
-
-unsigned TargetData::getPrefTypeAlignment(Type *Ty) const {
- return getAlignment(Ty, false);
-}
-
-unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const {
- unsigned Align = getPrefTypeAlignment(Ty);
- assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
- return Log2_32(Align);
-}
-
-/// getIntPtrType - Return an unsigned integer type that is the same size or
-/// greater to the host pointer size.
-IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
- return IntegerType::get(C, getPointerSizeInBits());
-}
-
-
-uint64_t TargetData::getIndexedOffset(Type *ptrTy,
- ArrayRef<Value *> Indices) const {
- Type *Ty = ptrTy;
- assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
- uint64_t Result = 0;
-
- generic_gep_type_iterator<Value* const*>
- TI = gep_type_begin(ptrTy, Indices);
- for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
- ++CurIDX, ++TI) {
- if (StructType *STy = dyn_cast<StructType>(*TI)) {
- assert(Indices[CurIDX]->getType() ==
- Type::getInt32Ty(ptrTy->getContext()) &&
- "Illegal struct idx");
- unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
- // Get structure layout information...
- const StructLayout *Layout = getStructLayout(STy);
-
- // Add in the offset, as calculated by the structure layout info...
- Result += Layout->getElementOffset(FieldNo);
-
- // Update Ty to refer to current element
- Ty = STy->getElementType(FieldNo);
- } else {
- // Update Ty to refer to current element
- Ty = cast<SequentialType>(Ty)->getElementType();
-
- // Get the array index and the size of each array element.
- if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
- Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
- }
- }
-
- return Result;
-}
-
-/// getPreferredAlignment - Return the preferred alignment of the specified
-/// global. This includes an explicitly requested alignment (if the global
-/// has one).
-unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
- Type *ElemType = GV->getType()->getElementType();
- unsigned Alignment = getPrefTypeAlignment(ElemType);
- unsigned GVAlignment = GV->getAlignment();
- if (GVAlignment >= Alignment) {
- Alignment = GVAlignment;
- } else if (GVAlignment != 0) {
- Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
- }
-
- if (GV->hasInitializer() && GVAlignment == 0) {
- if (Alignment < 16) {
- // If the global is not external, see if it is large. If so, give it a
- // larger alignment.
- if (getTypeSizeInBits(ElemType) > 128)
- Alignment = 16; // 16-byte alignment.
- }
- }
- return Alignment;
-}
-
-/// getPreferredAlignmentLog - Return the preferred alignment of the
-/// specified global, returned in log form. This includes an explicitly
-/// requested alignment (if the global has one).
-unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
- return Log2_32(getPreferredAlignment(GV));
-}
Removed: llvm/branches/AMDILBackend/lib/Target/TargetELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetELFWriterInfo.cpp?rev=172540&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetELFWriterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetELFWriterInfo.cpp (removed)
@@ -1,25 +0,0 @@
-//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetELFWriterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Function.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
- is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
-}
-
-TargetELFWriterInfo::~TargetELFWriterInfo() {}
-
Modified: llvm/branches/AMDILBackend/lib/Target/TargetLibraryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetLibraryInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetLibraryInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetLibraryInfo.cpp Tue Jan 15 11:16:16 2013
@@ -24,6 +24,16 @@
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "_ZdaPv",
+ "_ZdlPv",
+ "_Znaj",
+ "_ZnajRKSt9nothrow_t",
+ "_Znam",
+ "_ZnamRKSt9nothrow_t",
+ "_Znwj",
+ "_ZnwjRKSt9nothrow_t",
+ "_Znwm",
+ "_ZnwmRKSt9nothrow_t",
"__cxa_atexit",
"__cxa_guard_abort",
"__cxa_guard_acquire",
@@ -31,16 +41,29 @@
"__memcpy_chk",
"acos",
"acosf",
+ "acosh",
+ "acoshf",
+ "acoshl",
"acosl",
"asin",
"asinf",
+ "asinh",
+ "asinhf",
+ "asinhl",
"asinl",
"atan",
"atan2",
"atan2f",
"atan2l",
"atanf",
+ "atanh",
+ "atanhf",
+ "atanhl",
"atanl",
+ "calloc",
+ "cbrt",
+ "cbrtf",
+ "cbrtl",
"ceil",
"ceilf",
"ceill",
@@ -54,6 +77,9 @@
"coshl",
"cosl",
"exp",
+ "exp10",
+ "exp10f",
+ "exp10l",
"exp2",
"exp2f",
"exp2l",
@@ -74,6 +100,7 @@
"fmodl",
"fputc",
"fputs",
+ "free",
"fwrite",
"iprintf",
"log",
@@ -86,8 +113,12 @@
"log2",
"log2f",
"log2l",
+ "logb",
+ "logbf",
+ "logbl",
"logf",
"logl",
+ "malloc",
"memchr",
"memcmp",
"memcpy",
@@ -97,11 +128,14 @@
"nearbyint",
"nearbyintf",
"nearbyintl",
+ "posix_memalign",
"pow",
"powf",
"powl",
"putchar",
"puts",
+ "realloc",
+ "reallocf",
"rint",
"rintf",
"rintl",
@@ -118,14 +152,30 @@
"sqrt",
"sqrtf",
"sqrtl",
+ "stpcpy",
"strcat",
"strchr",
+ "strcmp",
"strcpy",
+ "strcspn",
+ "strdup",
"strlen",
"strncat",
"strncmp",
"strncpy",
+ "strndup",
"strnlen",
+ "strpbrk",
+ "strrchr",
+ "strspn",
+ "strstr",
+ "strtod",
+ "strtof",
+ "strtol",
+ "strtold",
+ "strtoll",
+ "strtoul",
+ "strtoull",
"tan",
"tanf",
"tanh",
@@ -134,7 +184,8 @@
"tanl",
"trunc",
"truncf",
- "truncl"
+ "truncl",
+ "valloc"
};
/// initialize - Initialize the set of available library functions based on the
@@ -205,6 +256,21 @@
TLI.setUnavailable(LibFunc::tanhl);
// Win32 only has C89 math
+ TLI.setUnavailable(LibFunc::acosh);
+ TLI.setUnavailable(LibFunc::acoshf);
+ TLI.setUnavailable(LibFunc::acoshl);
+ TLI.setUnavailable(LibFunc::asinh);
+ TLI.setUnavailable(LibFunc::asinhf);
+ TLI.setUnavailable(LibFunc::asinhl);
+ TLI.setUnavailable(LibFunc::atanh);
+ TLI.setUnavailable(LibFunc::atanhf);
+ TLI.setUnavailable(LibFunc::atanhl);
+ TLI.setUnavailable(LibFunc::cbrt);
+ TLI.setUnavailable(LibFunc::cbrtf);
+ TLI.setUnavailable(LibFunc::cbrtl);
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc::exp10l);
TLI.setUnavailable(LibFunc::exp2);
TLI.setUnavailable(LibFunc::exp2f);
TLI.setUnavailable(LibFunc::exp2l);
@@ -217,6 +283,9 @@
TLI.setUnavailable(LibFunc::log1p);
TLI.setUnavailable(LibFunc::log1pf);
TLI.setUnavailable(LibFunc::log1pl);
+ TLI.setUnavailable(LibFunc::logb);
+ TLI.setUnavailable(LibFunc::logbf);
+ TLI.setUnavailable(LibFunc::logbl);
TLI.setUnavailable(LibFunc::nearbyint);
TLI.setUnavailable(LibFunc::nearbyintf);
TLI.setUnavailable(LibFunc::nearbyintl);
@@ -254,6 +323,10 @@
TLI.setUnavailable(LibFunc::tanf);
TLI.setUnavailable(LibFunc::tanhf);
}
+
+ // Win32 does *not* provide stpcpy. It is provided on POSIX systems:
+ // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
+ TLI.setUnavailable(LibFunc::stpcpy);
}
}
Modified: llvm/branches/AMDILBackend/lib/Target/TargetLoweringObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetLoweringObjectFile.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetLoweringObjectFile.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetLoweringObjectFile.cpp Tue Jan 15 11:16:16 2013
@@ -22,7 +22,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Dwarf.h"
@@ -184,7 +184,7 @@
// Otherwise, just drop it into a mergable constant section. If we have
// a section for this size, use it, otherwise use the arbitrary sized
// mergable section.
- switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+ switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
case 4: return SectionKind::getMergeableConst4();
case 8: return SectionKind::getMergeableConst8();
case 16: return SectionKind::getMergeableConst16();
Modified: llvm/branches/AMDILBackend/lib/Target/TargetMachineC.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetMachineC.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetMachineC.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetMachineC.cpp Tue Jan 15 11:16:16 2013
@@ -14,7 +14,7 @@
#include "llvm-c/Core.h"
#include "llvm-c/Target.h"
#include "llvm-c/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -146,7 +146,7 @@
}
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
- return wrap(unwrap(T)->getTargetData());
+ return wrap(unwrap(T)->getDataLayout());
}
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
@@ -158,14 +158,14 @@
std::string error;
- const TargetData* td = TM->getTargetData();
+ const DataLayout* td = TM->getDataLayout();
if (!td) {
- error = "No TargetData in TargetMachine";
+ error = "No DataLayout in TargetMachine";
*ErrorMessage = strdup(error.c_str());
return true;
}
- pass.add(new TargetData(*td));
+ pass.add(new DataLayout(*td));
TargetMachine::CodeGenFileType ft;
switch (codegen) {
@@ -184,7 +184,7 @@
}
if (TM->addPassesToEmitFile(pass, destf, ft)) {
- error = "No TargetData in TargetMachine";
+ error = "No DataLayout in TargetMachine";
*ErrorMessage = strdup(error.c_str());
return true;
}
Modified: llvm/branches/AMDILBackend/lib/Target/TargetRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/TargetRegisterInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/TargetRegisterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/TargetRegisterInfo.cpp Tue Jan 15 11:16:16 2013
@@ -20,8 +20,10 @@
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
regclass_iterator RCB, regclass_iterator RCE,
- const char *const *subregindexnames)
- : InfoDesc(ID), SubRegIndexNames(subregindexnames),
+ const char *const *SRINames,
+ const unsigned *SRILaneMasks)
+ : InfoDesc(ID), SubRegIndexNames(SRINames),
+ SubRegIndexLaneMasks(SRILaneMasks),
RegClassBegin(RCB), RegClassEnd(RCE) {
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmLexer.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmLexer.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmLexer.cpp Tue Jan 15 11:16:16 2013
@@ -18,19 +18,19 @@
using namespace llvm;
namespace {
-
+
class X86AsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
-
+
bool tentativeIsValid;
AsmToken tentativeToken;
-
+
const AsmToken &lexTentative() {
tentativeToken = getLexer()->Lex();
tentativeIsValid = true;
return tentativeToken;
}
-
+
const AsmToken &lexDefinite() {
if (tentativeIsValid) {
tentativeIsValid = false;
@@ -38,7 +38,7 @@
}
return getLexer()->Lex();
}
-
+
AsmToken LexTokenATT();
AsmToken LexTokenIntel();
protected:
@@ -47,7 +47,7 @@
SetError(SMLoc(), "No MCAsmLexer installed");
return AsmToken(AsmToken::Error, "", 0);
}
-
+
switch (AsmInfo.getAssemblerDialect()) {
default:
SetError(SMLoc(), "Unhandled dialect");
@@ -71,33 +71,32 @@
AsmToken X86AsmLexer::LexTokenATT() {
AsmToken lexedToken = lexDefinite();
-
+
switch (lexedToken.getKind()) {
default:
return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
return lexedToken;
-
+
case AsmToken::Percent: {
const AsmToken &nextToken = lexTentative();
if (nextToken.getKind() != AsmToken::Identifier)
return lexedToken;
-
if (unsigned regID = MatchRegisterName(nextToken.getString())) {
lexDefinite();
-
+
// FIXME: This is completely wrong when there is a space or other
// punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
+ lexedToken.getString().size() +
nextToken.getString().size());
-
- return AsmToken(AsmToken::Register, regStr,
+
+ return AsmToken(AsmToken::Register, regStr,
static_cast<int64_t>(regID));
}
-
+
// Match register name failed. If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (nextToken.getString().size() == 3 &&
@@ -113,29 +112,29 @@
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
-
+
if (RegNo != -1) {
lexDefinite();
// FIXME: This is completely wrong when there is a space or other
// punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
+ lexedToken.getString().size() +
nextToken.getString().size());
- return AsmToken(AsmToken::Register, regStr,
+ return AsmToken(AsmToken::Register, regStr,
static_cast<int64_t>(RegNo));
}
}
-
-
+
+
return lexedToken;
- }
+ }
}
}
AsmToken X86AsmLexer::LexTokenIntel() {
const AsmToken &lexedToken = lexDefinite();
-
+
switch(lexedToken.getKind()) {
default:
return lexedToken;
@@ -144,7 +143,7 @@
return lexedToken;
case AsmToken::Identifier: {
unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
+
if (regID)
return AsmToken(AsmToken::Register,
lexedToken.getString(),
Modified: llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/AsmParser/X86AsmParser.cpp Tue Jan 15 11:16:16 2013
@@ -11,12 +11,14 @@
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
@@ -33,13 +35,16 @@
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ ParseInstructionInfo *InstInfo;
private:
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+ bool MatchingInlineAsm = false) {
+ if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -51,23 +56,25 @@
X86Operand *ParseOperand();
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
- X86Operand *ParseIntelMemOperand();
+ X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
+ X86Operand *ParseIntelTypeOperator(SMLoc StartLoc);
+ X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
+ SmallString<64> &Err);
+
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
-
- bool MatchInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- SmallVectorImpl<MCInst> &MCInsts);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
@@ -96,14 +103,15 @@
public:
X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
@@ -159,6 +167,7 @@
} Kind;
SMLoc StartLoc, EndLoc;
+ SMLoc OffsetOfLoc;
union {
struct {
@@ -172,6 +181,7 @@
struct {
const MCExpr *Val;
+ bool NeedAsmRewrite;
} Imm;
struct {
@@ -181,6 +191,7 @@
unsigned IndexReg;
unsigned Scale;
unsigned Size;
+ bool NeedSizeDir;
} Mem;
};
@@ -191,8 +202,11 @@
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+ /// getOffsetOfLoc - Get the location of the offset operator.
+ SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
virtual void print(raw_ostream &OS) const {}
@@ -216,6 +230,11 @@
return Imm.Val;
}
+ bool needAsmRewrite() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.NeedAsmRewrite;
+ }
+
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
@@ -312,6 +331,20 @@
return isImmSExti64i32Value(CE->getValue());
}
+ unsigned getMemSize() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Size;
+ }
+
+ bool isOffsetOf() const {
+ return OffsetOfLoc.getPointer();
+ }
+
+ bool needSizeDirective() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.NeedSizeDir;
+ }
+
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -437,21 +470,25 @@
return Res;
}
- static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+ static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ SMLoc OffsetOfLoc = SMLoc()) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
+ Res->OffsetOfLoc = OffsetOfLoc;
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
+ bool NeedRewrite = true){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
+ Res->Imm.NeedAsmRewrite = NeedRewrite;
return Res;
}
/// Create an absolute memory operand.
- static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
- SMLoc EndLoc, unsigned Size = 0) {
+ static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0, bool NeedSizeDir = false){
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -459,6 +496,7 @@
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
+ Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
@@ -466,7 +504,7 @@
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0) {
+ unsigned Size = 0, bool NeedSizeDir = false) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -481,6 +519,7 @@
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
+ Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
};
@@ -510,12 +549,13 @@
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
RegNo = 0;
- if (!isParsingIntelSyntax()) {
- const AsmToken &TokPercent = Parser.getTok();
- assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
- StartLoc = TokPercent.getLoc();
+ const AsmToken &PercentTok = Parser.getTok();
+ StartLoc = PercentTok.getLoc();
+
+ // If we encounter a %, ignore it. This code handles registers with and
+ // without the prefix, unprefixed registers can occur in cfi directives.
+ if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
Parser.Lex(); // Eat percent token.
- }
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -621,23 +661,25 @@
/// getIntelMemOperandSize - Return intel memory operand size.
static unsigned getIntelMemOperandSize(StringRef OpStr) {
- unsigned Size = 0;
- if (OpStr == "BYTE") Size = 8;
- if (OpStr == "WORD") Size = 16;
- if (OpStr == "DWORD") Size = 32;
- if (OpStr == "QWORD") Size = 64;
- if (OpStr == "XWORD") Size = 80;
- if (OpStr == "XMMWORD") Size = 128;
- if (OpStr == "YMMWORD") Size = 256;
+ unsigned Size = StringSwitch<unsigned>(OpStr)
+ .Cases("BYTE", "byte", 8)
+ .Cases("WORD", "word", 16)
+ .Cases("DWORD", "dword", 32)
+ .Cases("QWORD", "qword", 64)
+ .Cases("XWORD", "xword", 80)
+ .Cases("XMMWORD", "xmmword", 128)
+ .Cases("YMMWORD", "ymmword", 256)
+ .Default(0);
return Size;
}
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
- SMLoc Start = Parser.getTok().getLoc(), End;
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Start = Tok.getLoc(), End;
- const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
// Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
// Eat '['
@@ -653,15 +695,17 @@
if (getLexer().isNot(AsmToken::RBrac))
return ErrorOperand(Start, "Expected ']' token!");
Parser.Lex();
+ End = Tok.getLoc();
return X86Operand::CreateMem(Disp, Start, End, Size);
}
} else if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Parser.getTok().getIntVal();
+ int64_t Val = Tok.getIntVal();
Parser.Lex();
- SMLoc Loc = Parser.getTok().getLoc();
+ SMLoc Loc = Tok.getLoc();
if (getLexer().is(AsmToken::RBrac)) {
// Handle '[' number ']'
Parser.Lex();
+ End = Tok.getLoc();
const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
if (SegReg)
return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
@@ -670,7 +714,7 @@
} else if (getLexer().is(AsmToken::Star)) {
// Handle '[' Scale*IndexReg ']'
Parser.Lex();
- SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ SMLoc IdxRegLoc = Tok.getLoc();
if (ParseRegister(IndexReg, IdxRegLoc, End))
return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
@@ -678,16 +722,27 @@
return ErrorOperand(Loc, "Unexpected token");
}
- if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
- bool isPlus = getLexer().is(AsmToken::Plus);
+ // Parse ][ as a plus.
+ bool ExpectRBrac = true;
+ if (getLexer().is(AsmToken::RBrac)) {
+ ExpectRBrac = false;
Parser.Lex();
- SMLoc PlusLoc = Parser.getTok().getLoc();
+ End = Tok.getLoc();
+ }
+
+ if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
+ getLexer().is(AsmToken::LBrac)) {
+ ExpectRBrac = true;
+ bool isPlus = getLexer().is(AsmToken::Plus) ||
+ getLexer().is(AsmToken::LBrac);
+ Parser.Lex();
+ SMLoc PlusLoc = Tok.getLoc();
if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Parser.getTok().getIntVal();
+ int64_t Val = Tok.getIntVal();
Parser.Lex();
if (getLexer().is(AsmToken::Star)) {
Parser.Lex();
- SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ SMLoc IdxRegLoc = Tok.getLoc();
if (ParseRegister(IndexReg, IdxRegLoc, End))
return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
@@ -698,21 +753,48 @@
return ErrorOperand(PlusLoc, "unexpected token after +");
} else if (getLexer().is(AsmToken::Identifier)) {
// This could be an index register or a displacement expression.
- End = Parser.getTok().getLoc();
+ End = Tok.getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
else if (getParser().ParseExpression(Disp, End)) return 0;
}
}
+
+ // Parse ][ as a plus.
+ if (getLexer().is(AsmToken::RBrac)) {
+ ExpectRBrac = false;
+ Parser.Lex();
+ End = Tok.getLoc();
+ if (getLexer().is(AsmToken::LBrac)) {
+ ExpectRBrac = true;
+ Parser.Lex();
+ if (getParser().ParseExpression(Disp, End))
+ return 0;
+ }
+ } else if (ExpectRBrac) {
+ if (getParser().ParseExpression(Disp, End))
+ return 0;
+ }
- if (getLexer().isNot(AsmToken::RBrac))
- if (getParser().ParseExpression(Disp, End)) return 0;
+ if (ExpectRBrac) {
+ if (getLexer().isNot(AsmToken::RBrac))
+ return ErrorOperand(End, "expected ']' token!");
+ Parser.Lex();
+ End = Tok.getLoc();
+ }
- End = Parser.getTok().getLoc();
- if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(End, "expected ']' token!");
- Parser.Lex();
- End = Parser.getTok().getLoc();
+ // Parse the dot operator (e.g., [ebx].foo.bar).
+ if (Tok.getString().startswith(".")) {
+ SmallString<64> Err;
+ const MCExpr *NewDisp;
+ if (ParseIntelDotOperator(Disp, &NewDisp, Err))
+ return ErrorOperand(Tok.getLoc(), Err);
+
+ Parser.Lex(); // Eat the field.
+ Disp = NewDisp;
+ }
+
+ End = Tok.getLoc();
// handle [-42]
if (!BaseReg && !IndexReg)
@@ -723,15 +805,15 @@
}
/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand() {
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Parser.getTok().getLoc(), End;
- unsigned SegReg = 0;
+ SMLoc End;
unsigned Size = getIntelMemOperandSize(Tok.getString());
if (Size) {
Parser.Lex();
- assert (Tok.getString() == "PTR" && "Unexpected token!");
+ assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
+ "Unexpected token!");
Parser.Lex();
}
@@ -750,12 +832,164 @@
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getParser().ParseExpression(Disp, End)) return 0;
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ End = Parser.getTok().getLoc();
+
+ bool NeedSizeDir = false;
+ if (!Size && isParsingInlineAsm()) {
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
+ NeedSizeDir = Size > 0;
+ }
+ }
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+ else
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size, NeedSizeDir);
+}
+
+/// Parse the '.' operator.
+bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+ const MCExpr **NewDisp,
+ SmallString<64> &Err) {
+ AsmToken Tok = *&Parser.getTok();
+ uint64_t OrigDispVal, DotDispVal;
+
+ // FIXME: Handle non-constant expressions.
+ if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+ OrigDispVal = OrigDisp->getValue();
+ } else {
+ Err = "Non-constant offsets are not supported!";
+ return true;
+ }
+
+ // Drop the '.'.
+ StringRef DotDispStr = Tok.getString().drop_front(1);
+
+ // .Imm gets lexed as a real.
+ if (Tok.is(AsmToken::Real)) {
+ APInt DotDisp;
+ DotDispStr.getAsInteger(10, DotDisp);
+ DotDispVal = DotDisp.getZExtValue();
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // We should only see an identifier when parsing the original inline asm.
+ // The front-end should rewrite this in terms of immediates.
+ assert (isParsingInlineAsm() && "Unexpected field name!");
+
+ unsigned DotDisp;
+ std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
+ if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
+ DotDisp)) {
+ Err = "Unable to lookup field reference!";
+ return true;
+ }
+ DotDispVal = DotDisp;
+ } else {
+ Err = "Unexpected token type!";
+ return true;
+ }
+
+ if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
+ SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
+ unsigned Len = DotDispStr.size();
+ unsigned Val = OrigDispVal + DotDispVal;
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
+ Val));
+ }
+
+ *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ return false;
+}
+
+/// Parse the 'offset' operator. This operator is used to specify the
+/// location rather then the content of a variable.
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
+ SMLoc OffsetOfLoc = Start;
+ Parser.Lex(); // Eat offset.
+ Start = Parser.getTok().getLoc();
+ assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+
+ SMLoc End;
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val, End))
+ return ErrorOperand(Start, "Unable to parse expression!");
+
+ End = Parser.getTok().getLoc();
+
+ // Don't emit the offset operator.
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
+
+ // The offset operator will have an 'r' constraint, thus we need to create
+ // register operand to ensure proper matching. Just pick a GPR based on
+ // the size of a pointer.
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
+}
+
+/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or
+/// C++ type or variable. If the variable is an array, TYPE returns the size of
+/// a single element of the array.
+X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
+ SMLoc TypeLoc = Start;
+ Parser.Lex(); // Eat offset.
+ Start = Parser.getTok().getLoc();
+ assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+
+ SMLoc End;
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val, End))
+ return 0;
+
+ End = Parser.getTok().getLoc();
+
+ unsigned Size = 0;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
+ return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
+
+ Size /= 8; // Size is in terms of bits, but we want bytes in the context.
+ }
+
+ // Rewrite the type operator and the C or C++ type or variable in terms of an
+ // immediate. E.g. TYPE foo -> $$4
+ unsigned Len = End.getPointer() - TypeLoc.getPointer();
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size));
+
+ const MCExpr *Imm = MCConstantExpr::Create(Size, getContext());
+ return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
+ // offset operator.
+ StringRef AsmTokStr = Parser.getTok().getString();
+ if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") &&
+ isParsingInlineAsm())
+ return ParseIntelOffsetOfOperator(Start);
+
+ // Type directive.
+ if ((AsmTokStr == "type" || AsmTokStr == "TYPE") &&
+ isParsingInlineAsm())
+ return ParseIntelTypeOperator(Start);
+
+ // Unsupported directives.
+ if (isParsingIntelSyntax() &&
+ (AsmTokStr == "size" || AsmTokStr == "SIZE" ||
+ AsmTokStr == "length" || AsmTokStr == "LENGTH"))
+ return ErrorOperand(Start, "Unsupported directive!");
+
// immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
getLexer().is(AsmToken::Minus)) {
@@ -769,12 +1003,17 @@
// register
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
- End = Parser.getTok().getLoc();
- return X86Operand::CreateReg(RegNo, Start, End);
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc());
+
+ getParser().Lex(); // Eat the colon.
+ return ParseIntelMemOperand(RegNo, Start);
}
// mem operand
- return ParseIntelMemOperand();
+ return ParseIntelMemOperand(0, Start);
}
X86Operand *X86AsmParser::ParseATTOperand() {
@@ -972,8 +1211,9 @@
}
bool X86AsmParser::
-ParseInstruction(StringRef Name, SMLoc NameLoc,
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ InstInfo = &Info;
StringRef PatchedName = Name;
// FIXME: Hack to recognize setneb as setne.
@@ -1509,28 +1749,18 @@
}
bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
- SmallVector<MCInst, 2> Insts;
- bool Error = MatchInstruction(IDLoc, Operands, Insts);
- if (!Error)
- for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- Out.EmitInstruction(Insts[i]);
- return Error;
-}
-
-bool X86AsmParser::
-MatchInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- SmallVectorImpl<MCInst> &MCInsts) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
- // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
@@ -1539,7 +1769,8 @@
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
- MCInsts.push_back(Inst);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
const char *Repl =
StringSwitch<const char*>(Op->getToken())
@@ -1558,28 +1789,30 @@
}
bool WasOriginallyInvalidOperand = false;
- unsigned OrigErrorInfo;
MCInst Inst;
// First, try a direct match.
- switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo,
+ switch (MatchInstructionImpl(Operands, Inst,
+ ErrorInfo, MatchingInlineAsm,
isParsingIntelSyntax())) {
default: break;
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
- while (processInstruction(Inst, Operands))
- ;
+ if (!MatchingInlineAsm)
+ while (processInstruction(Inst, Operands))
+ ;
Inst.setLoc(IDLoc);
- MCInsts.push_back(Inst);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
+ Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, MatchingInlineAsm);
return true;
- case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1612,13 +1845,17 @@
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
- Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
// Restore the old token.
Op->setTokenValue(Base);
@@ -1631,7 +1868,9 @@
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
- MCInsts.push_back(Inst);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
+ Opcode = Inst.getOpcode();
return false;
}
@@ -1658,7 +1897,7 @@
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
- Error(IDLoc, OS.str());
+ Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
return true;
}
@@ -1669,31 +1908,36 @@
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
+ ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
+ Op->getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
- Op->getLocRange());
+ Ranges, MatchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
- if (OrigErrorInfo != ~0U) {
- if (OrigErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction",
+ EmptyRanges, MatchingInlineAsm);
- X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+ X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
- OperandRange);
+ OperandRange, MatchingInlineAsm);
}
}
- return Error(IDLoc, "invalid operand for instruction");
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, MatchingInlineAsm);
return true;
}
@@ -1701,12 +1945,14 @@
// operand failure.
if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
(Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
- Error(IDLoc, "invalid operand for instruction");
+ Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
return true;
}
// If all of these were an outright failure, report it in a useless way.
- Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
+ EmptyRanges, MatchingInlineAsm);
return true;
}
@@ -1717,7 +1963,10 @@
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
- else if (IDVal.startswith(".intel_syntax")) {
+ else if (IDVal.startswith(".att_syntax")) {
+ getParser().setAssemblerDialect(0);
+ return false;
+ } else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if(Parser.getTok().getString() == "noprefix") {
Modified: llvm/branches/AMDILBackend/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/CMakeLists.txt?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/CMakeLists.txt Tue Jan 15 11:16:16 2013
@@ -17,7 +17,6 @@
X86AsmPrinter.cpp
X86COFFMachineModuleInfo.cpp
X86CodeEmitter.cpp
- X86ELFWriterInfo.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp
Modified: llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.cpp Tue Jan 15 11:16:16 2013
@@ -44,7 +44,7 @@
dbgs() << file << ":" << line << ": " << s;
}
-const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) {
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
return MII->getName(Opcode);
}
@@ -95,8 +95,8 @@
/// be a pointer to a MemoryObject.
/// @param byte - A pointer to the byte to be read.
/// @param address - The address to be read.
-static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
- MemoryObject* region = static_cast<MemoryObject*>(arg);
+static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
+ const MemoryObject* region = static_cast<const MemoryObject*>(arg);
return region->readByte(address, byte);
}
@@ -135,10 +135,10 @@
int ret = decodeInstruction(&internalInstr,
regionReader,
- (void*)®ion,
+ (const void*)®ion,
loggerFn,
(void*)&vStream,
- (void*)MII,
+ (const void*)MII,
address,
fMode);
@@ -379,6 +379,8 @@
}
switch (type) {
+ case TYPE_XMM32:
+ case TYPE_XMM64:
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
return;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86Disassembler.h Tue Jan 15 11:16:16 2013
@@ -78,7 +78,7 @@
uint16_t operands;
#define INSTRUCTION_IDS \
- unsigned instructionIDs;
+ uint16_t instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
Modified: llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c Tue Jan 15 11:16:16 2013
@@ -138,6 +138,10 @@
if (modFromModRM(modRM) == 0x3)
return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_SPLITMISC:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
case MODRM_FULL:
return modRMTable[dec->instructionIDs+modRM];
}
@@ -200,7 +204,7 @@
insn->readerCursor + offset); \
if (ret) \
return ret; \
- combined = combined | ((type)byte << ((type)offset * 8)); \
+ combined = combined | ((uint64_t)byte << (offset * 8)); \
} \
*ptr = combined; \
insn->readerCursor += sizeof(type); \
@@ -690,7 +694,7 @@
* @param orig - The instruction that is not 16-bit
* @param equiv - The instruction that is 16-bit
*/
-static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
for (i = 0;; i++) {
@@ -719,7 +723,7 @@
* @return - 0 if the ModR/M could be read when needed or was not needed;
* nonzero otherwise.
*/
-static int getID(struct InternalInstruction* insn, void *miiArg) {
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
@@ -856,7 +860,7 @@
specWithOpSizeName =
x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
- if (is16BitEquvalent(specName, specWithOpSizeName)) {
+ if (is16BitEquivalent(specName, specWithOpSizeName)) {
insn->instructionID = instructionIDWithOpsize;
insn->spec = specifierForUID(instructionIDWithOpsize);
} else {
@@ -1621,10 +1625,10 @@
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
- void* readerArg,
+ const void* readerArg,
dlog_t logger,
void* loggerArg,
- void* miiArg,
+ const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
Modified: llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h Tue Jan 15 11:16:16 2013
@@ -24,7 +24,7 @@
uint16_t operands;
#define INSTRUCTION_IDS \
- unsigned instructionIDs;
+ uint16_t instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
@@ -403,7 +403,7 @@
* be read from.
* @return - -1 if the byte cannot be read for any reason; 0 otherwise.
*/
-typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
/*
* dlog_t - Type for the logging function that the consumer can provide to
@@ -422,7 +422,7 @@
/* Reader interface (C) */
byteReader_t reader;
/* Opaque value passed to the reader */
- void* readerArg;
+ const void* readerArg;
/* The address of the next byte to read via the reader */
uint64_t readerCursor;
@@ -561,10 +561,10 @@
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
- void* readerArg,
+ const void* readerArg,
dlog_t logger,
void* loggerArg,
- void* miiArg,
+ const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode);
@@ -579,7 +579,7 @@
unsigned line,
const char *s);
-const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
#ifdef __cplusplus
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h Tue Jan 15 11:16:16 2013
@@ -160,6 +160,10 @@
* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
* corresponds to one instruction; otherwise, it corresponds to
* a different instruction.
+ * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
+ * divided by 8 is used to select instruction; otherwise, each
+ * value of the ModR/M byte could correspond to a different
+ * instruction.
* MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
corresponds to instructions that use reg field as opcode
* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
@@ -169,6 +173,7 @@
#define MODRMTYPES \
ENUM_ENTRY(MODRM_ONEENTRY) \
ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_SPLITMISC) \
ENUM_ENTRY(MODRM_SPLITREG) \
ENUM_ENTRY(MODRM_FULL)
Modified: llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp Tue Jan 15 11:16:16 2013
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -33,11 +34,19 @@
void X86ATTInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
- OS << '%' << getRegisterName(RegNo);
+ OS << markup("<reg:")
+ << '%' << getRegisterName(RegNo)
+ << markup(">");
}
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
@@ -52,7 +61,8 @@
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- switch (MI->getOperand(Op).getImm()) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
+ switch (Imm) {
default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
@@ -70,6 +80,30 @@
case 0xd: O << "ge"; break;
case 0xe: O << "gt"; break;
case 0xf: O << "true"; break;
+ }
+}
+
+void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid avxcc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
case 0x10: O << "eq_os"; break;
case 0x11: O << "lt_oq"; break;
case 0x12: O << "le_oq"; break;
@@ -89,12 +123,12 @@
}
}
-/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value (e.g. for jumps and calls). These
/// print slightly differently than normal immediates. For example, a $ is not
/// emitted.
-void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
O << Op.getImm();
@@ -119,17 +153,21 @@
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
- O << '%' << getRegisterName(Op.getReg());
+ printRegName(O, Op.getReg());
} else if (Op.isImm()) {
// Print X86 immediates as signed values.
- O << '$' << (int64_t)Op.getImm();
+ O << markup("<imm:")
+ << '$' << (int64_t)Op.getImm()
+ << markup(">");
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << '$' << *Op.getExpr();
+ O << markup("<imm:")
+ << '$' << *Op.getExpr()
+ << markup(">");
}
}
@@ -140,6 +178,8 @@
const MCOperand &DispSpec = MI->getOperand(Op+3);
const MCOperand &SegReg = MI->getOperand(Op+4);
+ O << markup("<mem:");
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+4, O);
@@ -164,9 +204,15 @@
O << ',';
printOperand(MI, Op+2, O);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
- if (ScaleVal != 1)
- O << ',' << ScaleVal;
+ if (ScaleVal != 1) {
+ O << ','
+ << markup("<imm:")
+ << ScaleVal
+ << markup(">");
+ }
}
O << ')';
}
+
+ O << markup(">");
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h Tue Jan 15 11:16:16 2013
@@ -40,7 +40,8 @@
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
- void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+ void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
Modified: llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp Tue Jan 15 11:16:16 2013
@@ -1,4 +1,4 @@
-//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file includes code for rendering MCInst instances as AT&T-style
+// This file includes code for rendering MCInst instances as Intel-style
// assembly.
//
//===----------------------------------------------------------------------===//
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
@@ -32,6 +33,12 @@
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
printInstruction(MI, OS);
// Next always print the annotation.
@@ -44,7 +51,8 @@
void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- switch (MI->getOperand(Op).getImm()) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
+ switch (Imm) {
default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
@@ -62,6 +70,30 @@
case 0xd: O << "ge"; break;
case 0xe: O << "gt"; break;
case 0xf: O << "true"; break;
+ }
+}
+
+void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid avxcc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
case 0x10: O << "eq_os"; break;
case 0x11: O << "lt_oq"; break;
case 0x12: O << "le_oq"; break;
@@ -78,14 +110,13 @@
case 0x1d: O << "ge_oq"; break;
case 0x1e: O << "gt_oq"; break;
case 0x1f: O << "true_us"; break;
-
}
}
-/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value.
-void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
O << Op.getImm();
@@ -153,8 +184,7 @@
printOperand(MI, Op+2, O);
NeedPlus = true;
}
-
-
+
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
Modified: llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h Tue Jan 15 11:16:16 2013
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This class prints an X86 MCInst to intel style .s file syntax.
+// This class prints an X86 MCInst to Intel style .s file syntax.
//
//===----------------------------------------------------------------------===//
@@ -37,7 +37,8 @@
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
- void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "OPAQUE PTR ";
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp Tue Jan 15 11:16:16 2013
@@ -66,9 +66,10 @@
};
class X86AsmBackend : public MCAsmBackend {
+ StringRef CPU;
public:
- X86AsmBackend(const Target &T)
- : MCAsmBackend() {}
+ X86AsmBackend(const Target &T, StringRef _CPU)
+ : MCAsmBackend(), CPU(_CPU) {}
unsigned getNumFixupKinds() const {
return X86::NumTargetFixupKinds;
@@ -278,9 +279,9 @@
Res.setOpcode(RelaxedOp);
}
-/// writeNopData - Write optimal nops to the output file for the \arg Count
+/// writeNopData - Write optimal nops to the output file for the \p Count
/// bytes. This returns the number of bytes written. It may return 0 if
-/// the \arg Count is more than the maximum optimal nops.
+/// the \p Count is more than the maximum optimal nops.
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
static const uint8_t Nops[10][10] = {
// nop
@@ -305,6 +306,15 @@
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
+ // This CPU doesnt support long nops. If needed add more.
+ // FIXME: Can we get this from the subtarget somehow?
+ if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
+ CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
+ for (uint64_t i = 0; i < Count; ++i)
+ OW->Write8(0x90);
+ return true;
+ }
+
// Write an optimal sequence for the first 15 bytes.
const uint64_t OptimalCount = (Count < 16) ? Count : 15;
const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
@@ -327,8 +337,8 @@
class ELFX86AsmBackend : public X86AsmBackend {
public:
uint8_t OSABI;
- ELFX86AsmBackend(const Target &T, uint8_t _OSABI)
- : X86AsmBackend(T), OSABI(_OSABI) {
+ ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU)
+ : X86AsmBackend(T, CPU), OSABI(_OSABI) {
HasReliableSymbolDifference = true;
}
@@ -340,21 +350,21 @@
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_32AsmBackend(const Target &T, uint8_t OSABI)
- : ELFX86AsmBackend(T, OSABI) {}
+ ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI);
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
}
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_64AsmBackend(const Target &T, uint8_t OSABI)
- : ELFX86AsmBackend(T, OSABI) {}
+ ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI);
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
}
};
@@ -362,8 +372,8 @@
bool Is64Bit;
public:
- WindowsX86AsmBackend(const Target &T, bool is64Bit)
- : X86AsmBackend(T)
+ WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU)
+ : X86AsmBackend(T, CPU)
, Is64Bit(is64Bit) {
}
@@ -374,14 +384,14 @@
class DarwinX86AsmBackend : public X86AsmBackend {
public:
- DarwinX86AsmBackend(const Target &T)
- : X86AsmBackend(T) { }
+ DarwinX86AsmBackend(const Target &T, StringRef CPU)
+ : X86AsmBackend(T, CPU) { }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
public:
- DarwinX86_32AsmBackend(const Target &T)
- : DarwinX86AsmBackend(T) {}
+ DarwinX86_32AsmBackend(const Target &T, StringRef CPU)
+ : DarwinX86AsmBackend(T, CPU) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
@@ -392,8 +402,8 @@
class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
public:
- DarwinX86_64AsmBackend(const Target &T)
- : DarwinX86AsmBackend(T) {
+ DarwinX86_64AsmBackend(const Target &T, StringRef CPU)
+ : DarwinX86AsmBackend(T, CPU) {
HasReliableSymbolDifference = true;
}
@@ -439,28 +449,28 @@
} // end anonymous namespace
-MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T);
+ return new DarwinX86_32AsmBackend(T, CPU);
- if (TheTriple.isOSWindows())
- return new WindowsX86AsmBackend(T, false);
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
+ return new WindowsX86AsmBackend(T, false, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- return new ELFX86_32AsmBackend(T, OSABI);
+ return new ELFX86_32AsmBackend(T, OSABI, CPU);
}
-MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T);
+ return new DarwinX86_64AsmBackend(T, CPU);
- if (TheTriple.isOSWindows())
- return new WindowsX86AsmBackend(T, true);
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
+ return new WindowsX86AsmBackend(T, true, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- return new ELFX86_64AsmBackend(T, OSABI);
+ return new ELFX86_64AsmBackend(T, OSABI, CPU);
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86BaseInfo.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86BaseInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86BaseInfo.h Tue Jan 15 11:16:16 2013
@@ -276,9 +276,9 @@
MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
- MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50,
- MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54,
- MRM_DF = 55,
+ MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
+ MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
+ MRM_DE = 55, MRM_DF = 56,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -580,11 +580,11 @@
case X86II::MRM_E8: case X86II::MRM_F0:
case X86II::MRM_F8: case X86II::MRM_F9:
case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA:
- case X86II::MRM_DB: case X86II::MRM_DC:
- case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF:
+ case X86II::MRM_D4: case X86II::MRM_D5:
+ case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_DE: case X86II::MRM_DF:
return -1;
}
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp Tue Jan 15 11:16:16 2013
@@ -20,7 +20,7 @@
namespace {
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
- X86ELFObjectWriter(bool is64Bit, uint8_t OSABI);
+ X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
virtual ~X86ELFObjectWriter();
protected:
@@ -30,10 +30,11 @@
};
}
-X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI)
- : MCELFObjectTargetWriter(Is64Bit, OSABI,
- Is64Bit ? ELF::EM_X86_64 : ELF::EM_386,
- /*HasRelocationAddend*/ Is64Bit) {}
+X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
+ uint16_t EMachine)
+ : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
+ // Only i386 uses Rel instead of RelA.
+ /*HasRelocationAddend*/ EMachine != ELF::EM_386) {}
X86ELFObjectWriter::~X86ELFObjectWriter()
{}
@@ -48,7 +49,7 @@
MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
unsigned Type;
- if (is64Bit()) {
+ if (getEMachine() == ELF::EM_X86_64) {
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
@@ -130,7 +131,7 @@
case FK_Data_1: Type = ELF::R_X86_64_8; break;
}
}
- } else {
+ } else if (getEMachine() == ELF::EM_386) {
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
@@ -210,15 +211,17 @@
case FK_Data_1: Type = ELF::R_386_8; break;
}
}
- }
+ } else
+ llvm_unreachable("Unsupported ELF machine type.");
return Type;
}
MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint8_t OSABI) {
+ bool IsELF64,
+ uint8_t OSABI,
+ uint16_t EMachine) {
MCELFObjectTargetWriter *MOTW =
- new X86ELFObjectWriter(Is64Bit, OSABI);
+ new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp Tue Jan 15 11:16:16 2013
@@ -34,6 +34,10 @@
clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
clEnumValEnd));
+static cl::opt<bool>
+MarkedJTDataRegions("mark-data-regions", cl::init(false),
+ cl::desc("Mark code section jump table data regions."),
+ cl::Hidden);
void X86MCAsmInfoDarwin::anchor() { }
@@ -59,6 +63,7 @@
SupportsDebugInformation = true;
DwarfUsesInlineInfoSection = true;
+ UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Tue Jan 15 11:16:16 2013
@@ -16,6 +16,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -28,8 +29,8 @@
namespace {
class X86MCCodeEmitter : public MCCodeEmitter {
- X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
MCContext &Ctx;
@@ -51,8 +52,8 @@
return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
}
- static unsigned GetX86RegNum(const MCOperand &MO) {
- return X86_MC::getX86RegNum(MO.getReg());
+ unsigned GetX86RegNum(const MCOperand &MO) const {
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()) & 0x7;
}
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
@@ -64,8 +65,8 @@
// VEX.VVVV => XMM9 => ~9
//
// See table 4-35 of Intel AVX Programming Reference for details.
- static unsigned char getVEXRegisterEncoding(const MCInst &MI,
- unsigned OpNum) {
+ unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) const {
unsigned SrcReg = MI.getOperand(OpNum).getReg();
unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
if (X86II::isX86_64ExtendedReg(SrcReg))
@@ -560,15 +561,6 @@
}
- // Set the vector length to 256-bit if YMM0-YMM15 is used
- for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
- if (!MI.getOperand(i).isReg())
- continue;
- unsigned SrcReg = MI.getOperand(i).getReg();
- if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
- VEX_L = 1;
- }
-
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = 0;
@@ -1129,13 +1121,13 @@
case X86II::MRM_C3: case X86II::MRM_C4:
case X86II::MRM_C8: case X86II::MRM_C9:
case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA:
- case X86II::MRM_DB: case X86II::MRM_DC:
- case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E8:
- case X86II::MRM_F0: case X86II::MRM_F8:
- case X86II::MRM_F9:
+ case X86II::MRM_D4: case X86II::MRM_D5:
+ case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E8: case X86II::MRM_F0:
+ case X86II::MRM_F8: case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
unsigned char MRM;
@@ -1150,6 +1142,7 @@
case X86II::MRM_D0: MRM = 0xD0; break;
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
+ case X86II::MRM_D5: MRM = 0xD5; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp Tue Jan 15 11:16:16 2013
@@ -209,117 +209,10 @@
return DWARFFlavour::X86_32_Generic;
}
-/// getX86RegNum - This function maps LLVM register identifiers to their X86
-/// specific numbering, which is used in various places encoding instructions.
-unsigned X86_MC::getX86RegNum(unsigned RegNo) {
- switch(RegNo) {
- case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
- case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
- case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
- case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
- case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
- return N86::ESP;
- case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
- return N86::EBP;
- case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
- return N86::ESI;
- case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
- return N86::EDI;
-
- case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
- return N86::EAX;
- case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
- return N86::ECX;
- case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
- return N86::EDX;
- case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
- return N86::EBX;
- case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
- return N86::ESP;
- case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
- return N86::EBP;
- case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
- return N86::ESI;
- case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
- return N86::EDI;
-
- case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
- case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
- return RegNo-X86::ST0;
-
- case X86::XMM0: case X86::XMM8:
- case X86::YMM0: case X86::YMM8: case X86::MM0:
- return 0;
- case X86::XMM1: case X86::XMM9:
- case X86::YMM1: case X86::YMM9: case X86::MM1:
- return 1;
- case X86::XMM2: case X86::XMM10:
- case X86::YMM2: case X86::YMM10: case X86::MM2:
- return 2;
- case X86::XMM3: case X86::XMM11:
- case X86::YMM3: case X86::YMM11: case X86::MM3:
- return 3;
- case X86::XMM4: case X86::XMM12:
- case X86::YMM4: case X86::YMM12: case X86::MM4:
- return 4;
- case X86::XMM5: case X86::XMM13:
- case X86::YMM5: case X86::YMM13: case X86::MM5:
- return 5;
- case X86::XMM6: case X86::XMM14:
- case X86::YMM6: case X86::YMM14: case X86::MM6:
- return 6;
- case X86::XMM7: case X86::XMM15:
- case X86::YMM7: case X86::YMM15: case X86::MM7:
- return 7;
-
- case X86::ES: return 0;
- case X86::CS: return 1;
- case X86::SS: return 2;
- case X86::DS: return 3;
- case X86::FS: return 4;
- case X86::GS: return 5;
-
- case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
- case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
- case X86::CR2: case X86::CR10: case X86::DR2: return 2;
- case X86::CR3: case X86::CR11: case X86::DR3: return 3;
- case X86::CR4: case X86::CR12: case X86::DR4: return 4;
- case X86::CR5: case X86::CR13: case X86::DR5: return 5;
- case X86::CR6: case X86::CR14: case X86::DR6: return 6;
- case X86::CR7: case X86::CR15: case X86::DR7: return 7;
-
- // Pseudo index registers are equivalent to a "none"
- // scaled index (See Intel Manual 2A, table 2-3)
- case X86::EIZ:
- case X86::RIZ:
- return 4;
-
- default:
- assert((int(RegNo) > 0) && "Unknown physical register!");
- return 0;
- }
-}
-
void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
// FIXME: TableGen these.
for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
- int SEH = X86_MC::getX86RegNum(Reg);
- switch (Reg) {
- case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
- case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
- case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
- case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
- case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
- case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
- case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
- case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
- case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
- case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
- case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
- case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
- SEH += 8;
- break;
- }
+ unsigned SEH = MRI->getEncodingValue(Reg);
MRI->mapLLVMRegToSEHReg(Reg, SEH);
}
}
@@ -379,11 +272,15 @@
MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
MAI = new X86MCAsmInfoDarwin(TheTriple);
+ } else if (TheTriple.getEnvironment() == Triple::ELF) {
+ // Force the use of an ELF container.
+ MAI = new X86ELFMCAsmInfo(TheTriple);
} else if (TheTriple.getOS() == Triple::Win32) {
MAI = new X86MCAsmInfoMicrosoft(TheTriple);
} else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
+ // The default is ELF.
MAI = new X86ELFMCAsmInfo(TheTriple);
}
@@ -465,7 +362,7 @@
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
- if (TheTriple.isOSWindows())
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h Tue Jan 15 11:16:16 2013
@@ -64,8 +64,6 @@
unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
- unsigned getX86RegNum(unsigned RegNo);
-
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
/// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
@@ -80,8 +78,8 @@
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT);
-MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU);
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
@@ -91,8 +89,9 @@
/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint8_t OSABI);
+ bool IsELF64,
+ uint8_t OSABI,
+ uint16_t EMachine);
/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
} // End llvm namespace
Modified: llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp Tue Jan 15 11:16:16 2013
@@ -11,11 +11,13 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Object/MachOFormat.h"
using namespace llvm;
@@ -23,7 +25,7 @@
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- void RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -335,7 +337,7 @@
Writer->addRelocation(Fragment->getParent(), MRE);
}
-void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -381,6 +383,19 @@
// Relocations are written out in reverse order, so the PAIR comes first.
if (Type == macho::RIT_Difference ||
Type == macho::RIT_Generic_LocalDifference) {
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") + Buffer +
+ ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
macho::RelocationEntry MRE;
MRE.Word0 = ((0 << 0) |
(macho::RIT_Pair << 24) |
@@ -389,6 +404,16 @@
macho::RF_Scattered);
MRE.Word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
}
macho::RelocationEntry MRE;
@@ -399,6 +424,7 @@
macho::RF_Scattered);
MRE.Word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
}
void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
@@ -469,9 +495,11 @@
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
- if (Target.getSymB())
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ if (Target.getSymB()) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ return;
+ }
// Get the symbol data, if any.
MCSymbolData *SD = 0;
@@ -483,9 +511,13 @@
uint32_t Offset = Target.getConstant();
if (IsPCRel)
Offset += 1 << Log2Size;
- if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ // Try to record the scattered relocation if needed. Fall back to non
+ // scattered if necessary (see comments in RecordScatteredRelocation()
+ // for details).
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue))
+ return;
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
Modified: llvm/branches/AMDILBackend/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/README-SSE.txt?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/README-SSE.txt (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/README-SSE.txt Tue Jan 15 11:16:16 2013
@@ -941,3 +941,15 @@
cost of reduced accuracy.
//===---------------------------------------------------------------------===//
+
+This function should be matched to haddpd when the appropriate CPU is enabled:
+
+#include <x86intrin.h>
+double f (__m128d p) {
+ return p[0] + p[1];
+}
+
+similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should
+turn into hsubpd also.
+
+//===---------------------------------------------------------------------===//
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86.td Tue Jan 15 11:16:16 2013
@@ -17,14 +17,14 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
//
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
//===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
//===----------------------------------------------------------------------===//
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
@@ -97,7 +97,7 @@
[FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
- [FeatureAVX, FeatureSSE4A]>;
+ [FeatureFMA4]>;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -118,8 +118,13 @@
"Support BMI instructions">;
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
+def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
+ "Support RTM instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
+def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
+ "HasSlowDivide", "true",
+ "Use small divide for positive values less than 256">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -159,8 +164,9 @@
FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>;
+def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+ FeatureSlowDivide]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureFastUAMem,
@@ -188,7 +194,8 @@
FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase,
FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA]>;
+ FeatureBMI2, FeatureFMA,
+ FeatureRTM]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -226,7 +233,8 @@
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI]>;
+ FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
+def : Proc<"geode", [Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.cpp Tue Jan 15 11:16:16 2013
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
@@ -206,10 +205,10 @@
}
}
-/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value. These print slightly differently, for
/// example, a $ is not emitted.
-void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
+void X86AsmPrinter::printPCRelImm(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -233,15 +232,17 @@
void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier) {
+ raw_ostream &O, const char *Modifier,
+ unsigned AsmVariant) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("unknown operand type!");
case MachineOperand::MO_Register: {
- O << '%';
+ // FIXME: Enumerating AsmVariant, so we can remove magic number.
+ if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
@@ -265,46 +266,6 @@
}
}
-void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
- raw_ostream &O) {
- unsigned char value = MI->getOperand(Op).getImm();
- switch (value) {
- default: llvm_unreachable("Invalid ssecc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
- case 8: O << "eq_uq"; break;
- case 9: O << "nge"; break;
- case 0xa: O << "ngt"; break;
- case 0xb: O << "false"; break;
- case 0xc: O << "neq_oq"; break;
- case 0xd: O << "ge"; break;
- case 0xe: O << "gt"; break;
- case 0xf: O << "true"; break;
- case 0x10: O << "eq_os"; break;
- case 0x11: O << "lt_oq"; break;
- case 0x12: O << "le_oq"; break;
- case 0x13: O << "unord_s"; break;
- case 0x14: O << "neq_us"; break;
- case 0x15: O << "nlt_uq"; break;
- case 0x16: O << "nle_uq"; break;
- case 0x17: O << "ord_s"; break;
- case 0x18: O << "eq_us"; break;
- case 0x19: O << "nge_uq"; break;
- case 0x1a: O << "ngt_uq"; break;
- case 0x1b: O << "false_os"; break;
- case 0x1c: O << "neq_os"; break;
- case 0x1d: O << "ge_oq"; break;
- case 0x1e: O << "gt_oq"; break;
- case 0x1f: O << "true_us"; break;
- }
-}
-
void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
raw_ostream &O, const char *Modifier) {
const MachineOperand &BaseReg = MI->getOperand(Op);
@@ -363,10 +324,51 @@
printLeaMemReference(MI, Op, O, Modifier);
}
-void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
- raw_ostream &O) {
- O << *MF->getPICBaseSymbol() << '\n';
- O << *MF->getPICBaseSymbol() << ':';
+void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier,
+ unsigned AsmVariant){
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+ const MachineOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O, Modifier, AsmVariant);
+ O << ':';
+ }
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, O, Modifier, AsmVariant);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2, O, Modifier, AsmVariant);
+ NeedPlus = true;
+ }
+
+ assert (DispSpec.isImm() && "Displacement is not an immediate!");
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ O << ']';
}
bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
@@ -457,7 +459,7 @@
return false;
case 'P': // This is the operand of a call, treat specially.
- print_pcrel_imm(MI, OpNo, O);
+ printPCRelImm(MI, OpNo, O);
return false;
case 'n': // Negate the immediate or print a '-' before the operand.
@@ -471,7 +473,7 @@
}
}
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant);
return false;
}
@@ -479,6 +481,11 @@
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
+ if (AsmVariant) {
+ printIntelMemReference(MI, OpNo, O);
+ return false;
+ }
+
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -680,7 +687,7 @@
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86AsmPrinter.h Tue Jan 15 11:16:16 2013
@@ -34,47 +34,48 @@
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
- virtual const char *getPassName() const {
+ virtual const char *getPassName() const LLVM_OVERRIDE {
return "X86 AT&T-Style Assembly Printer";
}
const X86Subtarget &getSubtarget() const { return *Subtarget; }
- virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
- virtual void EmitEndOfAsmFile(Module &M);
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
- virtual void EmitInstruction(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
// These methods are used by the tablegen'erated instruction printer.
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
- void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ const char *Modifier = 0, unsigned AsmVariant = 0);
+ void printPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
- void printMachineInstruction(const MachineInstr *MI);
- void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O);
void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
const char *Modifier=NULL);
void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
const char *Modifier=NULL);
- void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+ void printIntelMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier=NULL,
+ unsigned AsmVariant = 1);
- bool runOnMachineFunction(MachineFunction &F);
+ virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ virtual MachineLocation
+ getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
};
} // end namespace llvm
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86COFFMachineModuleInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86COFFMachineModuleInfo.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86COFFMachineModuleInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86COFFMachineModuleInfo.h Tue Jan 15 11:16:16 2013
@@ -20,7 +20,7 @@
namespace llvm {
class X86MachineFunctionInfo;
- class TargetData;
+ class DataLayout;
/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
/// for X86 COFF targets.
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86CallingConv.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86CallingConv.td Tue Jan 15 11:16:16 2013
@@ -88,6 +88,21 @@
CCDelegateTo<RetCC_X86Common>
]>;
+// Intel_OCL_BI return-value convention.
+def RetCC_Intel_OCL_BI : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ // No more than 4 registers
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // i32, i64 in the standard way
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -128,6 +143,10 @@
// This is the return-value convention used for the entire X86 backend.
def RetCC_X86 : CallingConv<[
+
+ // Check if this is the Intel OpenCL built-ins calling convention
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+
CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
CCDelegateTo<RetCC_X86_32>
]>;
@@ -235,6 +254,29 @@
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
+
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+ CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
+
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -324,7 +366,7 @@
CCIfNest<CCAssignToReg<[EAX]>>,
// The first 2 integer arguments are passed in ECX/EDX
- CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+ CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>,
// Otherwise, same as everything else.
CCDelegateTo<CC_X86_32_Common>
@@ -408,6 +450,7 @@
// This is the argument convention used for the entire X86 backend.
def CC_X86 : CallingConv<[
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
CCDelegateTo<CC_X86_32>
]>;
@@ -426,3 +469,17 @@
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+
+
+// Standard C + YMM6-15
+def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
+ R13, R14, R15,
+ (sequence "YMM%u", 6, 15))>;
+
+//Standard C + XMM 8-15
+def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
+ (sequence "XMM%u", 8, 15))>;
+
+//Standard C + YMM 8-15
+def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
+ (sequence "YMM%u", 8, 15))>;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86CodeEmitter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86CodeEmitter.cpp Tue Jan 15 11:16:16 2013
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
@@ -43,7 +42,7 @@
template<class CodeEmitter>
class Emitter : public MachineFunctionPass {
const X86InstrInfo *II;
- const TargetData *TD;
+ const DataLayout *TD;
X86TargetMachine &TM;
CodeEmitter &MCE;
MachineModuleInfo *MMI;
@@ -57,7 +56,7 @@
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
Emitter(X86TargetMachine &tm, CodeEmitter &mce,
- const X86InstrInfo &ii, const TargetData &td, bool is64)
+ const X86InstrInfo &ii, const DataLayout &td, bool is64)
: MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -110,6 +109,14 @@
void emitMemModRMByte(const MachineInstr &MI,
unsigned Op, unsigned RegOpcodeField,
intptr_t PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ return TRI->getEncodingValue(RegNo) & 0x7;
+ }
+
+ unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) const;
};
template<class CodeEmitter>
@@ -129,13 +136,12 @@
MCE.setModuleInfo(MMI);
II = TM.getInstrInfo();
- TD = TM.getTargetData();
+ TD = TM.getDataLayout();
Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
do {
- DEBUG(dbgs() << "JITTing function '"
- << MF.getFunction()->getName() << "'\n");
+ DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
@@ -365,7 +371,7 @@
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
unsigned RegOpcodeFld){
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg)));
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
}
template<class CodeEmitter>
@@ -503,7 +509,7 @@
// 2-7) and absolute references.
unsigned BaseRegNo = -1U;
if (BaseReg != 0 && BaseReg != X86::RIP)
- BaseRegNo = X86_MC::getX86RegNum(BaseReg);
+ BaseRegNo = getX86RegNum(BaseReg);
if (// The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
@@ -579,15 +585,15 @@
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
emitSIBByte(SS, IndexRegNo, 5);
} else {
- unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg);
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
else
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
emitSIBByte(SS, IndexRegNo, BaseRegNo);
@@ -758,10 +764,12 @@
// VEX.VVVV => XMM9 => ~9
//
// See table 4-35 of Intel AVX Programming Reference for details.
-static unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
- unsigned OpNum) {
+template<class CodeEmitter>
+unsigned char
+Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) const {
unsigned SrcReg = MI.getOperand(OpNum).getReg();
- unsigned SrcRegNum = X86_MC::getX86RegNum(MI.getOperand(OpNum).getReg());
+ unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg());
if (X86II::isX86_64ExtendedReg(SrcReg))
SrcRegNum |= 8;
@@ -923,17 +931,6 @@
}
- // Set the vector length to 256-bit if YMM0-YMM15 is used
- for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
- if (!MI.getOperand(i).isReg())
- continue;
- if (MI.getOperand(i).isImplicit())
- continue;
- unsigned SrcReg = MI.getOperand(i).getReg();
- if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
- VEX_L = 1;
- }
-
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
unsigned NumOps = Desc->getNumOperands();
unsigned CurOp = 0;
@@ -1248,7 +1245,7 @@
case X86II::AddRegFrm: {
MCE.emitByte(BaseOpcode +
- X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg()));
+ getX86RegNum(MI.getOperand(CurOp++).getReg()));
if (CurOp == NumOps)
break;
@@ -1283,7 +1280,7 @@
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ getX86RegNum(MI.getOperand(CurOp+1).getReg()));
CurOp += 2;
break;
}
@@ -1294,7 +1291,7 @@
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
emitMemModRMByte(MI, CurOp,
- X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
CurOp = SrcRegNum + 1;
break;
}
@@ -1310,7 +1307,7 @@
++SrcRegNum;
emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
- X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
// 2 operands skipped with HasMemOp4, compensate accordingly
CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
if (HasVEX_4VOp3)
@@ -1332,7 +1329,7 @@
intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
X86II::getSizeOfImm(Desc->TSFlags) : 0;
emitMemModRMByte(MI, FirstMemOp,
- X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
+ getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
CurOp += AddrOperands + 1;
if (HasVEX_4VOp3)
++CurOp;
@@ -1422,7 +1419,7 @@
MCE.emitByte(BaseOpcode);
// Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
++CurOp;
break;
@@ -1455,7 +1452,7 @@
const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
: CurOp);
++CurOp;
- unsigned RegNum = X86_MC::getX86RegNum(MO.getReg()) << 4;
+ unsigned RegNum = getX86RegNum(MO.getReg()) << 4;
if (X86II::isX86_64ExtendedReg(MO.getReg()))
RegNum |= 1 << 7;
// If there is an additional 5th operand it must be an immediate, which
Removed: llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.cpp?rev=172540&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.cpp (removed)
@@ -1,147 +0,0 @@
-//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the X86 backend.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86ELFWriterInfo.h"
-#include "X86Relocations.h"
-#include "llvm/Function.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Implementation of the X86ELFWriterInfo class
-//===----------------------------------------------------------------------===//
-
-X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
- : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
- EMachine = is64Bit ? EM_X86_64 : EM_386;
- }
-
-X86ELFWriterInfo::~X86ELFWriterInfo() {}
-
-unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
- if (is64Bit) {
- switch(MachineRelTy) {
- case X86::reloc_pcrel_word:
- return ELF::R_X86_64_PC32;
- case X86::reloc_absolute_word:
- return ELF::R_X86_64_32;
- case X86::reloc_absolute_word_sext:
- return ELF::R_X86_64_32S;
- case X86::reloc_absolute_dword:
- return ELF::R_X86_64_64;
- case X86::reloc_picrel_word:
- default:
- llvm_unreachable("unknown x86_64 machine relocation type");
- }
- } else {
- switch(MachineRelTy) {
- case X86::reloc_pcrel_word:
- return ELF::R_386_PC32;
- case X86::reloc_absolute_word:
- return ELF::R_386_32;
- case X86::reloc_absolute_word_sext:
- case X86::reloc_absolute_dword:
- case X86::reloc_picrel_word:
- default:
- llvm_unreachable("unknown x86 machine relocation type");
- }
- }
-}
-
-long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier) const {
- if (is64Bit) {
- switch(RelTy) {
- case ELF::R_X86_64_PC32: return Modifier - 4;
- case ELF::R_X86_64_32:
- case ELF::R_X86_64_32S:
- case ELF::R_X86_64_64:
- return Modifier;
- default:
- llvm_unreachable("unknown x86_64 relocation type");
- }
- } else {
- switch(RelTy) {
- case ELF::R_386_PC32: return Modifier - 4;
- case ELF::R_386_32: return Modifier;
- default:
- llvm_unreachable("unknown x86 relocation type");
- }
- }
-}
-
-unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
- if (is64Bit) {
- switch(RelTy) {
- case ELF::R_X86_64_PC32:
- case ELF::R_X86_64_32:
- case ELF::R_X86_64_32S:
- return 32;
- case ELF::R_X86_64_64:
- return 64;
- default:
- llvm_unreachable("unknown x86_64 relocation type");
- }
- } else {
- switch(RelTy) {
- case ELF::R_386_PC32:
- case ELF::R_386_32:
- return 32;
- default:
- llvm_unreachable("unknown x86 relocation type");
- }
- }
-}
-
-bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
- if (is64Bit) {
- switch(RelTy) {
- case ELF::R_X86_64_PC32:
- return true;
- case ELF::R_X86_64_32:
- case ELF::R_X86_64_32S:
- case ELF::R_X86_64_64:
- return false;
- default:
- llvm_unreachable("unknown x86_64 relocation type");
- }
- } else {
- switch(RelTy) {
- case ELF::R_386_PC32:
- return true;
- case ELF::R_386_32:
- return false;
- default:
- llvm_unreachable("unknown x86 relocation type");
- }
- }
-}
-
-unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
- return is64Bit ?
- X86::reloc_absolute_dword : X86::reloc_absolute_word;
-}
-
-long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
- unsigned RelOffset,
- unsigned RelTy) const {
-
- if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
- return SymOffset - (RelOffset + 4);
-
- llvm_unreachable("computeRelocation unknown for this relocation type");
-}
Removed: llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.h?rev=172540&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86ELFWriterInfo.h (removed)
@@ -1,59 +0,0 @@
-//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the X86 backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_ELF_WRITER_INFO_H
-#define X86_ELF_WRITER_INFO_H
-
-#include "llvm/Target/TargetELFWriterInfo.h"
-
-namespace llvm {
-
- class X86ELFWriterInfo : public TargetELFWriterInfo {
-
- public:
- X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
- virtual ~X86ELFWriterInfo();
-
- /// getRelocationType - Returns the target specific ELF Relocation type.
- /// 'MachineRelTy' contains the object code independent relocation type
- virtual unsigned getRelocationType(unsigned MachineRelTy) const;
-
- /// hasRelocationAddend - True if the target uses an addend in the
- /// ELF relocation entry.
- virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
-
- /// getDefaultAddendForRelTy - Gets the default addend value for a
- /// relocation entry based on the target ELF relocation type.
- virtual long int getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier = 0) const;
-
- /// getRelTySize - Returns the size of relocatable field in bits
- virtual unsigned getRelocationTySize(unsigned RelTy) const;
-
- /// isPCRelativeRel - True if the relocation type is pc relative
- virtual bool isPCRelativeRel(unsigned RelTy) const;
-
- /// getJumpTableRelocationTy - Returns the machine relocation type used
- /// to reference a jumptable.
- virtual unsigned getAbsoluteLabelMachineRelTy() const;
-
- /// computeRelocation - Some relocatable fields could be relocated
- /// directly, avoiding the relocation symbol emission, compute the
- /// final relocation value for this symbol.
- virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
- unsigned RelTy) const;
- };
-
-} // end llvm namespace
-
-#endif // X86_ELF_WRITER_INFO_H
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86FastISel.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86FastISel.cpp Tue Jan 15 11:16:16 2013
@@ -45,9 +45,9 @@
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// StackPtr - Register used as the stack pointer.
+ /// RegInfo - X86 register info.
///
- unsigned StackPtr;
+ const X86RegisterInfo *RegInfo;
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
@@ -61,9 +61,9 @@
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
+ RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
}
virtual bool TargetSelectInstruction(const Instruction *I);
@@ -157,9 +157,9 @@
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
- return false;
+ return false;
if (VT == MVT::f32 && !X86ScalarSSEf32)
- return false;
+ return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
return false;
@@ -710,6 +710,8 @@
bool X86FastISel::X86SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
+ const X86MachineFunctionInfo *X86MFInfo =
+ FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
if (!FuncInfo.CanLowerReturn)
return false;
@@ -724,8 +726,7 @@
return false;
// Don't handle popping bytes on return for now.
- if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
- ->getBytesToPopOnReturn() != 0)
+ if (X86MFInfo->getBytesToPopOnReturn() != 0)
return 0;
// fastcc with -tailcallopt is intended to provide a guaranteed
@@ -809,6 +810,19 @@
MRI.addLiveOut(VA.getLocReg());
}
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+ unsigned Reg = X86MFInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments()!");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::RAX).addReg(Reg);
+ MRI.addLiveOut(X86::RAX);
+ }
+
// Now emit the RET.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
return true;
@@ -1527,9 +1541,9 @@
CallingConv::ID CC = CS.getCallingConv();
if (CC == CallingConv::Fast || CC == CallingConv::GHC)
return 0;
- if (!CS.paramHasAttr(1, Attribute::StructRet))
+ if (!CS.paramHasAttr(1, Attributes::StructRet))
return 0;
- if (CS.paramHasAttr(1, Attribute::InReg))
+ if (CS.paramHasAttr(1, Attributes::InReg))
return 0;
return 4;
}
@@ -1608,12 +1622,12 @@
Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ if (CS.paramHasAttr(AttrInd, Attributes::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
Flags.setZExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) {
PointerType *Ty = cast<PointerType>(ArgVal->getType());
Type *ElementTy = Ty->getElementType();
unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
@@ -1627,9 +1641,9 @@
return false;
}
- if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ if (CS.paramHasAttr(AttrInd, Attributes::InReg))
Flags.setInReg();
- if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ if (CS.paramHasAttr(AttrInd, Attributes::Nest))
Flags.setNest();
// If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
@@ -1771,7 +1785,7 @@
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
- AM.Base.Reg = StackPtr;
+ AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
@@ -1897,11 +1911,11 @@
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT.getSimpleVT();
MyFlags.Used = !CS.getInstruction()->use_empty();
- if (CS.paramHasAttr(0, Attribute::SExt))
+ if (CS.paramHasAttr(0, Attributes::SExt))
MyFlags.Flags.setSExt();
- if (CS.paramHasAttr(0, Attribute::ZExt))
+ if (CS.paramHasAttr(0, Attributes::ZExt))
MyFlags.Flags.setZExt();
- if (CS.paramHasAttr(0, Attribute::InReg))
+ if (CS.paramHasAttr(0, Attributes::InReg))
MyFlags.Flags.setInReg();
Ins.push_back(MyFlags);
}
@@ -2014,13 +2028,17 @@
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
MVT VT;
if (!isTypeLegal(C->getType(), VT))
- return false;
+ return 0;
+
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -2058,7 +2076,7 @@
break;
case MVT::f80:
// No f80 support yet.
- return false;
+ return 0;
}
// Materialize addresses with LEA instructions.
@@ -2142,28 +2160,28 @@
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
- case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = X86::FsFLD0SS;
- RC = &X86::FR32RegClass;
- } else {
- Opc = X86::LD_Fp032;
- RC = &X86::RFP32RegClass;
- }
- break;
- case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = X86::FsFLD0SD;
- RC = &X86::FR64RegClass;
- } else {
- Opc = X86::LD_Fp064;
- RC = &X86::RFP64RegClass;
- }
- break;
- case MVT::f80:
- // No f80 support yet.
- return false;
+ default: return false;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
}
unsigned ResultReg = createResultReg(RC);
@@ -2182,7 +2200,7 @@
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
- X86InstrInfo &XII = (X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86FloatingPoint.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86FloatingPoint.cpp Tue Jan 15 11:16:16 2013
@@ -171,6 +171,7 @@
// Shuffle live registers to match the expectations of successor blocks.
void finishBlockStack();
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
@@ -181,6 +182,7 @@
dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
dbgs() << "\n";
}
+#endif
/// getSlot - Return the stack slot number a particular register number is
/// in.
@@ -575,8 +577,8 @@
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
- const TableEntry &TE) {
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
+ const TableEntry &TE) {
return V < TE.from;
}
};
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86FrameLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86FrameLowering.cpp Tue Jan 15 11:16:16 2013
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/SmallSet.h"
@@ -313,11 +313,11 @@
if (CSI.empty()) return;
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = TM.getTargetData();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize();
+ int stackGrowth = -RegInfo->getSlotSize();
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
@@ -674,7 +674,7 @@
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -715,9 +715,8 @@
// ELSE => DW_CFA_offset_extended
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
uint64_t NumBytes = 0;
- int stackGrowth = -TD->getPointerSize();
+ int stackGrowth = -SlotSize;
if (HasFP) {
// Calculate required stack adjustment.
@@ -836,8 +835,6 @@
MI->getOperand(3).setIsDead();
}
- DL = MBB.findDebugLoc(MBBI);
-
// If there is an SUB32ri of ESP immediately before this instruction, merge
// the two. This can be the case when tail call elimination is enabled and
// the callee has more arguments then the caller.
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Jan 15 11:16:16 2013
@@ -100,6 +100,7 @@
Base_Reg = Reg;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
@@ -133,6 +134,7 @@
dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
+#endif
};
}
@@ -189,7 +191,6 @@
SDNode *Select(SDNode *N);
SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
- SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
@@ -244,13 +245,15 @@
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
AM.Align, AM.Disp, AM.SymbolFlags);
- else if (AM.ES)
+ else if (AM.ES) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
- else if (AM.JT != -1)
+ } else if (AM.JT != -1) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
- else if (AM.BlockAddr)
- Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
- true, AM.SymbolFlags);
+ } else if (AM.BlockAddr)
+ Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
+ AM.SymbolFlags);
else
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
@@ -359,7 +362,7 @@
/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
- SDValue Call, SDValue OrigChain) {
+ SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = OrigChain.getOperand(0);
if (Chain.getNode() == Load.getNode())
@@ -383,11 +386,13 @@
CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
+
+ unsigned NumOps = Call.getNode()->getNumOperands();
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
- for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ for (unsigned i = 1, e = NumOps; i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
}
/// isCalleeLoad - Return true if call address is a load and it can be
@@ -396,6 +401,10 @@
/// In the case of a tail call, there isn't a callseq node between the call
/// chain and the load.
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+ // The transformation is somewhat dangerous if the call's chain was glued to
+ // the call. After MoveBelowOrigChain the load is moved between the call and
+ // the chain, this can create a cycle if the load is not folded. So it is
+ // *really* important that we are sure the load will be folded.
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
@@ -425,7 +434,8 @@
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ OptForSize = MF->getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
@@ -433,7 +443,10 @@
if (OptLevel != CodeGenOpt::None &&
(N->getOpcode() == X86ISD::CALL ||
- N->getOpcode() == X86ISD::TC_RETURN)) {
+ (N->getOpcode() == X86ISD::TC_RETURN &&
+ // Only does this if load can be foled into TC_RETURN.
+ (Subtarget->is64Bit() ||
+ getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
@@ -652,10 +665,16 @@
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
- } else {
- AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
- }
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.SymbolFlags = BA->getTargetFlags();
+ if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
if (N.getOpcode() == X86ISD::WrapperRIP)
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
@@ -684,10 +703,12 @@
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
- } else {
- AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
- }
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.Disp += BA->getOffset();
+ AM.SymbolFlags = BA->getTargetFlags();
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
return false;
}
@@ -1011,7 +1032,7 @@
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
- uint64_t Disp = AddVal->getSExtValue() << Val;
+ uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
if (!FoldOffsetIntoAddress(Disp, AM))
return false;
}
@@ -1281,7 +1302,9 @@
// that are not a MemSDNode, and thus don't have proper addrspace info.
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
- Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+ Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+ Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS.
@@ -1468,6 +1491,7 @@
SDValue In1 = Node->getOperand(1);
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
+
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
@@ -1481,159 +1505,13 @@
return ResNode;
}
-// FIXME: Figure out some way to unify this with the 'or' and other code
-// below.
-SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
- if (Node->hasAnyUseOfValue(0))
- return 0;
-
- // Optimize common patterns for __sync_add_and_fetch and
- // __sync_sub_and_fetch where the result is not used. This allows us
- // to use "lock" version of add, sub, inc, dec instructions.
- // FIXME: Do not use special instructions but instead add the "lock"
- // prefix to the target node somehow. The extra information will then be
- // transferred to machine instruction and it denotes the prefix.
- SDValue Chain = Node->getOperand(0);
- SDValue Ptr = Node->getOperand(1);
- SDValue Val = Node->getOperand(2);
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return 0;
-
- bool isInc = false, isDec = false, isSub = false, isCN = false;
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
- if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
- isCN = true;
- int64_t CNVal = CN->getSExtValue();
- if (CNVal == 1)
- isInc = true;
- else if (CNVal == -1)
- isDec = true;
- else if (CNVal >= 0)
- Val = CurDAG->getTargetConstant(CNVal, NVT);
- else {
- isSub = true;
- Val = CurDAG->getTargetConstant(-CNVal, NVT);
- }
- } else if (Val.hasOneUse() &&
- Val.getOpcode() == ISD::SUB &&
- X86::isZeroNode(Val.getOperand(0))) {
- isSub = true;
- Val = Val.getOperand(1);
- }
-
- DebugLoc dl = Node->getDebugLoc();
- unsigned Opc = 0;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: return 0;
- case MVT::i8:
- if (isInc)
- Opc = X86::LOCK_INC8m;
- else if (isDec)
- Opc = X86::LOCK_DEC8m;
- else if (isSub) {
- if (isCN)
- Opc = X86::LOCK_SUB8mi;
- else
- Opc = X86::LOCK_SUB8mr;
- } else {
- if (isCN)
- Opc = X86::LOCK_ADD8mi;
- else
- Opc = X86::LOCK_ADD8mr;
- }
- break;
- case MVT::i16:
- if (isInc)
- Opc = X86::LOCK_INC16m;
- else if (isDec)
- Opc = X86::LOCK_DEC16m;
- else if (isSub) {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB16mi8;
- else
- Opc = X86::LOCK_SUB16mi;
- } else
- Opc = X86::LOCK_SUB16mr;
- } else {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD16mi8;
- else
- Opc = X86::LOCK_ADD16mi;
- } else
- Opc = X86::LOCK_ADD16mr;
- }
- break;
- case MVT::i32:
- if (isInc)
- Opc = X86::LOCK_INC32m;
- else if (isDec)
- Opc = X86::LOCK_DEC32m;
- else if (isSub) {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB32mi8;
- else
- Opc = X86::LOCK_SUB32mi;
- } else
- Opc = X86::LOCK_SUB32mr;
- } else {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD32mi8;
- else
- Opc = X86::LOCK_ADD32mi;
- } else
- Opc = X86::LOCK_ADD32mr;
- }
- break;
- case MVT::i64:
- if (isInc)
- Opc = X86::LOCK_INC64m;
- else if (isDec)
- Opc = X86::LOCK_DEC64m;
- else if (isSub) {
- Opc = X86::LOCK_SUB64mr;
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB64mi8;
- else if (i64immSExt32(Val.getNode()))
- Opc = X86::LOCK_SUB64mi32;
- }
- } else {
- Opc = X86::LOCK_ADD64mr;
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD64mi8;
- else if (i64immSExt32(Val.getNode()))
- Opc = X86::LOCK_ADD64mi32;
- }
- }
- break;
- }
-
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- if (isInc || isDec) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
- } else {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
- }
-}
-
+/// Atomic opcode table
+///
enum AtomicOpc {
+ ADD,
+ SUB,
+ INC,
+ DEC,
OR,
AND,
XOR,
@@ -1657,6 +1535,58 @@
static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
+ X86::LOCK_ADD8mi,
+ X86::LOCK_ADD8mr,
+ X86::LOCK_ADD16mi8,
+ X86::LOCK_ADD16mi,
+ X86::LOCK_ADD16mr,
+ X86::LOCK_ADD32mi8,
+ X86::LOCK_ADD32mi,
+ X86::LOCK_ADD32mr,
+ X86::LOCK_ADD64mi8,
+ X86::LOCK_ADD64mi32,
+ X86::LOCK_ADD64mr,
+ },
+ {
+ X86::LOCK_SUB8mi,
+ X86::LOCK_SUB8mr,
+ X86::LOCK_SUB16mi8,
+ X86::LOCK_SUB16mi,
+ X86::LOCK_SUB16mr,
+ X86::LOCK_SUB32mi8,
+ X86::LOCK_SUB32mi,
+ X86::LOCK_SUB32mr,
+ X86::LOCK_SUB64mi8,
+ X86::LOCK_SUB64mi32,
+ X86::LOCK_SUB64mr,
+ },
+ {
+ 0,
+ X86::LOCK_INC8m,
+ 0,
+ 0,
+ X86::LOCK_INC16m,
+ 0,
+ 0,
+ X86::LOCK_INC32m,
+ 0,
+ 0,
+ X86::LOCK_INC64m,
+ },
+ {
+ 0,
+ X86::LOCK_DEC8m,
+ 0,
+ 0,
+ X86::LOCK_DEC16m,
+ 0,
+ 0,
+ X86::LOCK_DEC32m,
+ 0,
+ 0,
+ X86::LOCK_DEC64m,
+ },
+ {
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
X86::LOCK_OR16mi8,
@@ -1667,7 +1597,7 @@
X86::LOCK_OR32mr,
X86::LOCK_OR64mi8,
X86::LOCK_OR64mi32,
- X86::LOCK_OR64mr
+ X86::LOCK_OR64mr,
},
{
X86::LOCK_AND8mi,
@@ -1680,7 +1610,7 @@
X86::LOCK_AND32mr,
X86::LOCK_AND64mi8,
X86::LOCK_AND64mi32,
- X86::LOCK_AND64mr
+ X86::LOCK_AND64mr,
},
{
X86::LOCK_XOR8mi,
@@ -1693,18 +1623,74 @@
X86::LOCK_XOR32mr,
X86::LOCK_XOR64mi8,
X86::LOCK_XOR64mi32,
- X86::LOCK_XOR64mr
+ X86::LOCK_XOR64mr,
}
};
+// Return the target constant operand for atomic-load-op and do simple
+// translations, such as from atomic-load-add to lock-sub. The return value is
+// one of the following 3 cases:
+// + target-constant, the operand could be supported as a target constant.
+// + empty, the operand is not needed any more with the new op selected.
+// + non-empty, otherwise.
+static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
+ DebugLoc dl,
+ enum AtomicOpc &Op, EVT NVT,
+ SDValue Val) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
+ int64_t CNVal = CN->getSExtValue();
+ // Quit if not 32-bit imm.
+ if ((int32_t)CNVal != CNVal)
+ return Val;
+ // For atomic-load-add, we could do some optimizations.
+ if (Op == ADD) {
+ // Translate to INC/DEC if ADD by 1 or -1.
+ if ((CNVal == 1) || (CNVal == -1)) {
+ Op = (CNVal == 1) ? INC : DEC;
+ // No more constant operand after being translated into INC/DEC.
+ return SDValue();
+ }
+ // Translate to SUB if ADD by negative value.
+ if (CNVal < 0) {
+ Op = SUB;
+ CNVal = -CNVal;
+ }
+ }
+ return CurDAG->getTargetConstant(CNVal, NVT);
+ }
+
+ // If the value operand is single-used, try to optimize it.
+ if (Op == ADD && Val.hasOneUse()) {
+ // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
+ if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
+ Op = SUB;
+ return Val.getOperand(1);
+ }
+ // A special case for i16, which needs truncating as, in most cases, it's
+ // promoted to i32. We will translate
+ // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
+ if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
+ Val.getOperand(0).getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
+ Op = SUB;
+ Val = Val.getOperand(0);
+ return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
+ Val.getOperand(1));
+ }
+ }
+
+ return Val;
+}
+
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
+ DebugLoc dl = Node->getDebugLoc();
+
// Optimize common patterns for __sync_or_and_fetch and similar arith
// operations where the result is not used. This allows us to use the "lock"
// version of the arithmetic instruction.
- // FIXME: Same as for 'add' and 'sub', try to merge those down here.
SDValue Chain = Node->getOperand(0);
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
@@ -1715,6 +1701,8 @@
// Which index into the table.
enum AtomicOpc Op;
switch (Node->getOpcode()) {
+ default:
+ return 0;
case ISD::ATOMIC_LOAD_OR:
Op = OR;
break;
@@ -1724,16 +1712,14 @@
case ISD::ATOMIC_LOAD_XOR:
Op = XOR;
break;
- default:
- return 0;
- }
-
- bool isCN = false;
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
- if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
- isCN = true;
- Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ case ISD::ATOMIC_LOAD_ADD:
+ Op = ADD;
+ break;
}
+
+ Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+ bool isUnOp = !Val.getNode();
+ bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
unsigned Opc = 0;
switch (NVT.getSimpleVT().SimpleTy) {
@@ -1775,13 +1761,20 @@
assert(Opc != 0 && "Invalid arith lock transform!");
- DebugLoc dl = Node->getDebugLoc();
+ SDValue Ret;
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ if (isUnOp) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ }
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
SDValue RetVals[] = { Undef, Ret };
return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
@@ -2059,16 +2052,24 @@
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
+ case X86ISD::ATOMMAX64_DAG:
+ case X86ISD::ATOMMIN64_DAG:
+ case X86ISD::ATOMUMAX64_DAG:
+ case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
- default: llvm_unreachable("Impossible intrinsic");
+ default: llvm_unreachable("Impossible opcode");
case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
+ case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
+ case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
+ case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
@@ -2077,15 +2078,10 @@
break;
}
- case ISD::ATOMIC_LOAD_ADD: {
- SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
- if (RetVal)
- return RetVal;
- break;
- }
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR: {
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
if (RetVal)
return RetVal;
@@ -2116,10 +2112,11 @@
// Make sure that we don't change the operation by removing bits.
// This only matters for OR and XOR, AND is unaffected.
- if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+ if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
break;
- unsigned ShlOp, Op = 0;
+ unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
@@ -2142,6 +2139,7 @@
ShlOp = X86::SHL32ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
@@ -2152,6 +2150,7 @@
ShlOp = X86::SHL64ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
@@ -2197,13 +2196,16 @@
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
@@ -2215,13 +2217,31 @@
}
}
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2233,22 +2253,47 @@
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
+ SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ ReplaceUses(N1.getValue(1), Chain);
} else {
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
- InFlag = SDValue(CNode, 0);
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2273,19 +2318,25 @@
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
@@ -2486,7 +2537,13 @@
MVT::i8, Reg);
// Emit a testb.
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
@@ -2517,8 +2574,13 @@
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
- return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
- Subreg, ShiftedImm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
@@ -2534,7 +2596,13 @@
MVT::i16, Reg);
// Emit a testw.
- return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
@@ -2550,7 +2618,13 @@
MVT::i32, Reg);
// Emit a testl.
- return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
}
break;
@@ -2605,85 +2679,6 @@
return Result;
}
-
- // FIXME: Custom handling because TableGen doesn't support multiple implicit
- // defs in an instruction pattern
- case X86ISD::PCMPESTRI: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- SDValue N2 = Node->getOperand(2);
- SDValue N3 = Node->getOperand(3);
- SDValue N4 = Node->getOperand(4);
-
- // Make sure last argument is a constant
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
- if (!Cst)
- break;
-
- uint64_t Imm = Cst->getZExtValue();
-
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
- X86::EAX, N1, SDValue()).getValue(1);
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
- N3, InFlag).getValue(1);
-
- SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
- unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
- X86::PCMPESTRIrr;
- InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
- array_lengthof(Ops)), 0);
-
- if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::ECX, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- }
- if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::EFLAGS, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- }
-
- return NULL;
- }
-
- // FIXME: Custom handling because TableGen doesn't support multiple implicit
- // defs in an instruction pattern
- case X86ISD::PCMPISTRI: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- SDValue N2 = Node->getOperand(2);
-
- // Make sure last argument is a constant
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
- if (!Cst)
- break;
-
- uint64_t Imm = Cst->getZExtValue();
-
- SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
- unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
- X86::PCMPISTRIrr;
- SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
- array_lengthof(Ops)), 0);
-
- if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::ECX, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- }
- if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::EFLAGS, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- }
-
- return NULL;
- }
}
SDNode *ResNode = SelectCode(Node);
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.cpp Tue Jan 15 11:16:16 2013
@@ -66,7 +66,7 @@
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, DebugLoc dl) {
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+ assert(VT.is256BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/128;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
@@ -85,7 +85,7 @@
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
* ElemsPerChunk);
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -105,7 +105,7 @@
return Result;
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+ assert(VT.is128BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
@@ -118,7 +118,7 @@
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
* ElemsPerChunk);
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
VecIdx);
}
@@ -158,10 +158,9 @@
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
- TD = getTargetData();
+ TD = getDataLayout();
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -180,7 +179,11 @@
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
- setStackPointerRegisterToSaveRestore(X86StackPtr);
+ setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
+
+ // Bypass i32 with i8 on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+ addBypassSlowDiv(32, 8);
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
@@ -453,6 +456,14 @@
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// Darwin ABI issue.
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
@@ -510,6 +521,10 @@
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
@@ -541,6 +556,7 @@
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -643,7 +659,9 @@
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f32 , Expand);
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f32 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
@@ -735,6 +753,7 @@
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
@@ -824,12 +843,12 @@
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
@@ -858,6 +877,7 @@
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
@@ -870,27 +890,18 @@
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
-
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,
- VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
@@ -907,23 +918,22 @@
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -936,6 +946,18 @@
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ // As there is no 64-bit GPR available, we need build a special custom
+ // sequence to convert from v2i32 to v2f32.
+ if (!Subtarget->is64Bit())
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
if (Subtarget->hasSSE41()) {
@@ -950,6 +972,9 @@
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1010,9 +1035,6 @@
}
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
-
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1030,25 +1052,32 @@
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f32, Custom);
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1073,7 +1102,7 @@
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
- if (Subtarget->hasFMA()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Custom);
setOperationAction(ISD::FMA, MVT::v4f64, Custom);
setOperationAction(ISD::FMA, MVT::v4f32, Custom);
@@ -1081,6 +1110,7 @@
setOperationAction(ISD::FMA, MVT::f32, Custom);
setOperationAction(ISD::FMA, MVT::f64, Custom);
}
+
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
@@ -1134,45 +1164,44 @@
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
- setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
@@ -1238,10 +1267,8 @@
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::FP_TO_SINT);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -1339,7 +1366,7 @@
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -1902,9 +1929,9 @@
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ else if (RegVT.is256BitVector())
RC = &X86::VR256RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ else if (RegVT.is128BitVector())
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
@@ -2007,7 +2034,8 @@
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getFnAttributes().
+ hasAttribute(Attributes::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2155,16 +2183,14 @@
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
- SDValue Chain, SDValue RetAddrFrIdx,
- bool Is64Bit, int FPDiff, DebugLoc dl) {
+ SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
+ unsigned SlotSize, int FPDiff, DebugLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
- int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
- EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
- SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
@@ -2199,7 +2225,7 @@
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
- MF.getFunction()->hasStructRetAttr(),
+ MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
@@ -2239,14 +2265,15 @@
int FPDiff = 0;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
- unsigned NumBytesCallerPushed =
- MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
+
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
- if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
- MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ if (FPDiff < X86Info->getTCReturnAddrDelta())
+ X86Info->setTCReturnAddrDelta(FPDiff);
}
if (!IsSibcall)
@@ -2282,7 +2309,7 @@
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
- if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -2323,7 +2350,8 @@
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
@@ -2411,7 +2439,8 @@
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ StackPtr = DAG.getCopyFromReg(Chain, dl,
+ RegInfo->getStackRegister(),
getPointerTy());
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
@@ -2433,7 +2462,8 @@
&MemOpChains2[0], MemOpChains2.size());
// Store the return address to the appropriate stack slot.
- Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
+ getPointerTy(), RegInfo->getSlotSize(),
FPDiff, dl);
}
@@ -2483,7 +2513,8 @@
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ cast<Function>(GV)->getFnAttributes().
+ hasAttribute(Attributes::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -2644,7 +2675,7 @@
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
- uint64_t SlotSize = TD->getPointerSize();
+ unsigned SlotSize = RegInfo->getSlotSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -2719,6 +2750,7 @@
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2730,6 +2762,13 @@
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
@@ -2853,7 +2892,7 @@
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ ((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3004,7 +3043,7 @@
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = TD->getPointerSize();
+ unsigned SlotSize = RegInfo->getSlotSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -3425,11 +3464,11 @@
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3444,11 +3483,11 @@
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3461,7 +3500,7 @@
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3483,10 +3522,12 @@
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
unsigned NumElems = VT.getVectorNumElements();
- if ((NumElems != 2 && NumElems != 4)
- || VT.getSizeInBits() > 128)
+ if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -3525,25 +3566,26 @@
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
MatchOddMask = false;
}
- static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
- static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
- const int *CompactionMask;
- if (MatchEvenMask)
- CompactionMask = CompactionMaskEven;
- else if (MatchOddMask)
- CompactionMask = CompactionMaskOdd;
- else
+ if (!MatchEvenMask && !MatchOddMask)
return SDValue();
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
- SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
- UndefNode, CompactionMask);
- SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
- UndefNode, CompactionMask);
- static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
- return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
+ SDValue Op0 = SVOp->getOperand(0);
+ SDValue Op1 = SVOp->getOperand(1);
+
+ if (MatchEvenMask) {
+ // Shift the second operand right to 32 bits.
+ static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
+ Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
+ } else {
+ // Shift the first operand left to 32 bits.
+ static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
+ Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
+ }
+ static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
}
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
@@ -3703,7 +3745,7 @@
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3725,7 +3767,7 @@
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || VT.getSizeInBits() != 256)
+ if (!HasAVX || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3817,9 +3859,10 @@
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
- unsigned NumOps = VT.getVectorNumElements();
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
+
+ unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -3885,9 +3928,11 @@
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!HasAVX || !VT.is256BitVector())
+ return false;
- if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
@@ -3903,7 +3948,7 @@
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
@@ -4148,7 +4193,7 @@
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
@@ -4205,7 +4250,7 @@
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
@@ -4591,7 +4636,6 @@
MVT ShufVT = V.getValueType().getSimpleVT();
unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
- SDValue ImmN;
bool IsUnary;
if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
@@ -4747,7 +4791,7 @@
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (SVOp->getValueType(0).getSizeInBits() > 128)
+ if (!SVOp->getValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -4842,7 +4886,7 @@
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+ assert(VT.is128BitVector() && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -4993,6 +5037,18 @@
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as LDBase in
+ // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
+ // update uses of LDBase's output chain to use the TokenFactor.
+ if (LDBase->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
@@ -5006,7 +5062,7 @@
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
SDValue
-X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
+X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->hasAVX())
return SDValue();
@@ -5075,7 +5131,7 @@
}
}
- bool Is256 = VT.getSizeInBits() == 256;
+ bool Is256 = VT.is256BitVector();
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
@@ -5131,6 +5187,80 @@
}
SDValue
+X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // Skip if insert_vec_elt is not supported.
+ if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned NumElems = Op.getNumOperands();
+
+ SDValue VecIn1;
+ SDValue VecIn2;
+ SmallVector<unsigned, 4> InsertIndices;
+ SmallVector<int, 8> Mask(NumElems, -1);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned Opc = Op.getOperand(i).getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ if (Opc != ISD::EXTRACT_VECTOR_ELT) {
+ // Quit if more than 1 elements need inserting.
+ if (InsertIndices.size() > 1)
+ return SDValue();
+
+ InsertIndices.push_back(i);
+ continue;
+ }
+
+ SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
+ SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
+ // Quit if extracted from vector of different type.
+ if (ExtractedFromVec.getValueType() != VT)
+ return SDValue();
+
+ // Quit if non-constant index.
+ if (!isa<ConstantSDNode>(ExtIdx))
+ return SDValue();
+
+ if (VecIn1.getNode() == 0)
+ VecIn1 = ExtractedFromVec;
+ else if (VecIn1 != ExtractedFromVec) {
+ if (VecIn2.getNode() == 0)
+ VecIn2 = ExtractedFromVec;
+ else if (VecIn2 != ExtractedFromVec)
+ // Quit if more than 2 vectors to shuffle
+ return SDValue();
+ }
+
+ unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
+
+ if (ExtractedFromVec == VecIn1)
+ Mask[i] = Idx;
+ else if (ExtractedFromVec == VecIn2)
+ Mask[i] = Idx + NumElems;
+ }
+
+ if (VecIn1.getNode() == 0)
+ return SDValue();
+
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
+ for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
+ unsigned Idx = InsertIndices[i];
+ NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
+ DAG.getIntPtrConstant(Idx));
+ }
+
+ return NV;
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5237,12 +5367,12 @@
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5251,11 +5381,11 @@
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5315,7 +5445,7 @@
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
@@ -5396,7 +5526,7 @@
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
@@ -5406,6 +5536,11 @@
if (LD.getNode())
return LD;
+ // Check for a build vector from mostly shuffle plus few inserting.
+ SDValue Sh = buildFromShuffleMostly(Op, DAG);
+ if (Sh.getNode())
+ return Sh;
+
// For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
@@ -5457,39 +5592,13 @@
return SDValue();
}
-// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
-// them in a MMX register. This is better than doing a stack convert.
-static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
-
- assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
- ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
- int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
- SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- InVec = Op.getOperand(1);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
- InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
- } else {
- InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
- SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- Mask[0] = 0; Mask[1] = 2;
- VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
- }
- return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-}
-
// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+ assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -5498,18 +5607,8 @@
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
-SDValue
-X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- EVT ResVT = Op.getValueType();
-
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 2);
- assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
- "Unsupported CONCAT_VECTORS for value type");
-
- // We support concatenate two MMX registers and place them in a MMX register.
- // This is better than doing a stack convert.
- if (ResVT.is128BitVector())
- return LowerMMXCONCAT_VECTORS(Op, DAG);
// 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
@@ -5517,9 +5616,9 @@
}
// Try to lower a shuffle node into a simple blend instruction.
-static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue
+LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -5589,9 +5688,9 @@
// 2. [ssse3] 1 x pshufb
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue
+LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
@@ -5848,8 +5947,6 @@
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
- bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@@ -5873,7 +5970,11 @@
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
- if (V2IsUndef)
+
+ // As PSHUFB will zero elements with negative indices, it's safe to ignore
+ // the 2nd operand if it's undefined or zero.
+ if (V2.getOpcode() == ISD::UNDEF ||
+ ISD::isBuildVectorAllZeros(V2.getNode()))
return V1;
// Calculate the shuffle mask for the second input, shuffle it, and
@@ -5959,6 +6060,51 @@
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
+// v32i8 shuffles - Translate to VPSHUFB if possible.
+static
+SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
+
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // VPSHUFB may be generated if
+ // (1) one of input vector is undefined or zeroinitializer.
+ // The mask value 0x80 puts 0 in the corresponding slot of the vector.
+ // And (2) the mask indexes don't cross the 128-bit lane.
+ if (VT != MVT::v32i8 || !Subtarget->hasAVX2() ||
+ (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
+ return SDValue();
+
+ if (V1IsAllZero && !V2IsAllZero) {
+ CommuteVectorShuffleMask(MaskVals, 32);
+ V1 = V2;
+ }
+ SmallVector<SDValue, 32> pshufbMask;
+ for (unsigned i = 0; i != 32; i++) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || EltIdx >= 32)
+ EltIdx = 0x80;
+ else {
+ if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
+ // Cross lane is not allowed.
+ return SDValue();
+ EltIdx &= 0xf;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v32i8, &pshufbMask[0], 32));
+}
+
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
@@ -6159,7 +6305,7 @@
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
- assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+ assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
@@ -6293,17 +6439,17 @@
}
static bool MayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
+
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
// BUILD_VECTOR (load), undef
V = V.getOperand(0);
- if (MayFoldLoad(V))
- return true;
- return false;
+
+ return MayFoldLoad(V);
}
// FIXME: the version above should always be used. Since there's
@@ -6426,6 +6572,81 @@
getShuffleSHUFImmediate(SVOp), DAG);
}
+// Reduce a vector shuffle to zext.
+SDValue
+X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+ // PMOVZX is only available from SSE41.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // Only AVX2 support 256-bit vector integer extending.
+ if (!Subtarget->hasAVX2() && VT.is256BitVector())
+ return SDValue();
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Extending is an unary operation and the element type of the source vector
+ // won't be equal to or larger than i64.
+ if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
+ VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
+ unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
+ while ((1U << Shift) < NumElems) {
+ if (SVOp->getMaskElt(1U << Shift) == 1)
+ break;
+ Shift += 1;
+ // The maximal ratio is 8, i.e. from i8 to i64.
+ if (Shift > 3)
+ return SDValue();
+ }
+
+ // Check the shuffle mask.
+ unsigned Mask = (1U << Shift) - 1;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if ((i & Mask) != 0 && EltIdx != -1)
+ return SDValue();
+ if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
+ return SDValue();
+ }
+
+ unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
+ EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
+ // Simplify the operand as it's prepared to be fed into shuffle.
+ unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
+ if (V1.getOpcode() == ISD::BITCAST &&
+ V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ V1.getOperand(0)
+ .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+ SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
+ ConstantSDNode *CIdx =
+ dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
+ // If it's foldable, i.e. normal load with single use, we will let code
+ // selection to fold it. Otherwise, we will short the conversion sequence.
+ if (CIdx && CIdx->getZExtValue() == 0 &&
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
+}
+
SDValue
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6456,6 +6677,11 @@
return PromoteSplat(SVOp, DAG);
}
+ // Check integer expanding shuffles.
+ SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
@@ -6505,7 +6731,8 @@
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -6774,7 +7001,7 @@
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -6785,9 +7012,15 @@
return NewOp;
}
+ if (VT == MVT::v32i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
- if (NumElems == 4 && VT.getSizeInBits() == 128)
+ if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
// Handle general 256-bit shuffles
@@ -6803,14 +7036,14 @@
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ if (!Op.getOperand(0).getValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
- Op.getOperand(0), Op.getOperand(1));
+ Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
- DAG.getValueType(VT));
+ DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
@@ -6825,9 +7058,9 @@
Op.getOperand(0)),
Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
- Op.getOperand(0), Op.getOperand(1));
+ Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
- DAG.getValueType(VT));
+ DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
@@ -6873,7 +7106,7 @@
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
- if (VecVT.getSizeInBits() == 256) {
+ if (VecVT.is256BitVector()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
@@ -6888,7 +7121,7 @@
DAG.getConstant(IdxVal, MVT::i32));
}
- assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+ assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
@@ -6911,9 +7144,9 @@
// Transform it so it match pextrw which produces a 32-bit result.
EVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
- Op.getOperand(0), Op.getOperand(1));
+ Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
- DAG.getValueType(VT));
+ DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
@@ -6964,7 +7197,7 @@
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return SDValue();
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
@@ -7020,7 +7253,7 @@
// If this is a 256-bit vector result, first extract the 128-bit vector,
// insert the element into the extracted half and then place it back.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
@@ -7056,15 +7289,14 @@
return SDValue();
}
-SDValue
-X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
EVT OpVT = Op.getValueType();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
- if (OpVT.getSizeInBits() > 128) {
+ if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
EVT VT128 = EVT::getVectorVT(*Context,
OpVT.getVectorElementType(),
@@ -7081,7 +7313,7 @@
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!");
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
@@ -7089,15 +7321,15 @@
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
// a simple subregister reference or explicit instructions to grab
// upper bits of a vector.
-SDValue
-X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
if (Subtarget->hasAVX()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 128 &&
- Vec.getNode()->getValueType(0).getSizeInBits() == 256 &&
+ if (Op.getNode()->getValueType(0).is128BitVector() &&
+ Vec.getNode()->getValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Extract128BitVector(Vec, IdxVal, DAG, dl);
@@ -7109,16 +7341,16 @@
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
// simple superregister reference or explicit instructions to insert
// the upper bits of a vector.
-SDValue
-X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
if (Subtarget->hasAVX()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 256 &&
- SubVec.getNode()->getValueType(0).getSizeInBits() == 128 &&
+ if (Op.getNode()->getValueType(0).is256BitVector() &&
+ SubVec.getNode()->getValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -7253,9 +7485,10 @@
Subtarget->ClassifyBlockAddressReference();
CodeModel::Model M = getTargetMachine().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
DebugLoc dl = Op.getDebugLoc();
- SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
- /*isTarget=*/true, OpFlags);
+ SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
+ OpFlags);
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
@@ -7364,8 +7597,8 @@
SDValue InFlag;
DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc(), PtrVT), InFlag);
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
@@ -7866,11 +8099,29 @@
return Sub;
}
+SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ EVT SVT = N0.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
+ SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
+ "Custom UINT_TO_FP is not supported!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+}
+
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType().isVector())
+ return lowerUINT_TO_FP_vec(Op, DAG);
+
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
@@ -8044,10 +8295,66 @@
}
}
+SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
+
+ if (!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements())
+ return SDValue();
+
+ assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+
+ // AVX2 has better support of integer extending.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
+ static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
+ SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
+ DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+}
+
+SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SVT = Op.getOperand(0).getValueType();
+
+ if (!VT.is128BitVector() || !SVT.is256BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements())
+ return SDValue();
+
+ assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ NumElems * 2);
+
+ SDValue In = Op.getOperand(0);
+ SmallVector<int, 16> MaskVec(NumElems * 2, -1);
+ // Prepare truncation shuffle mask
+ for (unsigned i = 0; i != NumElems; ++i)
+ MaskVec[i] = i * 2;
+ SDValue V = DAG.getVectorShuffle(NVT, DL,
+ DAG.getNode(ISD::BITCAST, DL, NVT, In),
+ DAG.getUNDEF(NVT), &MaskVec[0]);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
+ DAG.getIntPtrConstant(0));
+}
+
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector())
+ if (Op.getValueType().isVector()) {
+ if (Op.getValueType() == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+ DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
+ MVT::v8i32, Op.getOperand(0)));
return SDValue();
+ }
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ true, /*IsReplace=*/ false);
@@ -8082,26 +8389,49 @@
return FIST;
}
-SDValue X86TargetLowering::LowerFABS(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
+
+ assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+ In, DAG.getUNDEF(SVT)));
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT EltVT = VT;
- if (VT.isVector())
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
EltVT = VT.getVectorElementType();
- Constant *C;
- if (EltVT == MVT::f64) {
- C = ConstantVector::getSplat(2,
- ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- } else {
- C = ConstantVector::getSplat(4,
- ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+ NumElts = VT.getVectorNumElements();
}
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ Constant *C;
+ if (EltVT == MVT::f64)
+ C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ else
+ C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantVector::getSplat(NumElts, C);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ false, false, false, Alignment);
+ if (VT.isVector()) {
+ MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::AND, dl, ANDVT,
+ DAG.getNode(ISD::BITCAST, dl, ANDVT,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
+ }
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -8121,12 +8451,13 @@
else
C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ false, false, false, Alignment);
if (VT.isVector()) {
- MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -8210,7 +8541,7 @@
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
-SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -8221,6 +8552,98 @@
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
}
+// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
+//
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
+
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ if (!Op->hasOneUse())
+ return SDValue();
+
+ SDNode *N = Op.getNode();
+ DebugLoc DL = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Opnds;
+ DenseMap<SDValue, unsigned> VecInMap;
+ EVT VT = MVT::Other;
+
+ // Recognize a special case where a vector is casted into wide integer to
+ // test all 0s.
+ Opnds.push_back(N->getOperand(0));
+ Opnds.push_back(N->getOperand(1));
+
+ for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
+ SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
+ // BFS traverse all OR'd operands.
+ if (I->getOpcode() == ISD::OR) {
+ Opnds.push_back(I->getOperand(0));
+ Opnds.push_back(I->getOperand(1));
+ // Re-evaluate the number of nodes to be traversed.
+ e += 2; // 2 more nodes (LHS and RHS) are pushed.
+ continue;
+ }
+
+ // Quit if a non-EXTRACT_VECTOR_ELT
+ if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Quit if without a constant index.
+ SDValue Idx = I->getOperand(1);
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ SDValue ExtractedFromVec = I->getOperand(0);
+ DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
+ if (M == VecInMap.end()) {
+ VT = ExtractedFromVec.getValueType();
+ // Quit if not 128/256-bit vector.
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+ // Quit if not the same type.
+ if (VecInMap.begin() != VecInMap.end() &&
+ VT != VecInMap.begin()->first.getValueType())
+ return SDValue();
+ M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
+ }
+ M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
+ }
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Not extracted from 128-/256-bit vector.");
+
+ unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
+ SmallVector<SDValue, 8> VecIns;
+
+ for (DenseMap<SDValue, unsigned>::const_iterator
+ I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
+ // Quit if not all elements are used.
+ if (I->second != FullMask)
+ return SDValue();
+ VecIns.push_back(I->first);
+ }
+
+ EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+
+ // Cast all vectors into TestVT for PTEST.
+ for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
+ VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
+
+ // If more than one full vectors are evaluated, OR them first before PTEST.
+ for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
+ // Each iteration will OR 2 nodes and append the result until there is only
+ // 1 node left, i.e. the final OR'd value of all vectors.
+ SDValue LHS = VecIns[Slot];
+ SDValue RHS = VecIns[Slot + 1];
+ VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
+ }
+
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
+ VecIns.back(), VecIns.back());
+}
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
@@ -8254,7 +8677,33 @@
unsigned Opcode = 0;
unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
+
+ // Truncate operations may prevent the merge of the SETCC instruction
+ // and the arithmetic intruction before it. Attempt to truncate the operands
+ // of the arithmetic instruction and use a reduced bit-width instruction.
+ bool NeedTruncation = false;
+ SDValue ArithOp = Op;
+ if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+ SDValue Arith = Op->getOperand(0);
+ // Both the trunc and the arithmetic op need to have one user each.
+ if (Arith->hasOneUse())
+ switch (Arith.getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ NeedTruncation = true;
+ ArithOp = Arith;
+ }
+ }
+ }
+
+ // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+ // which may be the result of a CAST. We use the variable 'Op', which is the
+ // non-casted variable when we check for possible users.
+ switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
@@ -8274,7 +8723,7 @@
goto default_case;
if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
@@ -8310,7 +8759,7 @@
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
@@ -8331,14 +8780,20 @@
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
+ switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
- case ISD::SUB:
- Opcode = X86ISD::SUB;
- break;
- case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
+ case ISD::OR: {
+ if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+ SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG);
+ if (EFLAGS.getNode())
+ return EFLAGS;
+ }
+ Opcode = X86ISD::OR;
+ break;
+ }
}
NumOperands = 2;
@@ -8356,19 +8811,40 @@
break;
}
+ // If we found that truncation is beneficial, perform the truncation and
+ // update 'Op'.
+ if (NeedTruncation) {
+ EVT VT = Op.getValueType();
+ SDValue WideVal = Op->getOperand(0);
+ EVT WideVT = WideVal.getValueType();
+ unsigned ConvertedOp = 0;
+ // Use a target machine opcode to prevent further DAGCombine
+ // optimizations that may separate the arithmetic operations
+ // from the setcc node.
+ switch (WideVal.getOpcode()) {
+ default: break;
+ case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+ case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+ case ISD::AND: ConvertedOp = X86ISD::AND; break;
+ case ISD::OR: ConvertedOp = X86ISD::OR; break;
+ case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+ }
+
+ if (ConvertedOp) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+ SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+ SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+ Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+ }
+ }
+ }
+
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
- if (Opcode == X86ISD::CMP) {
- SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
- Op.getOperand(1));
- // We can't replace usage of SUB with CMP.
- // The SUB node will be removed later because there is no use of it.
- return SDValue(New.getNode(), 0);
- }
-
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
@@ -8554,7 +9030,7 @@
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -8591,10 +9067,12 @@
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
- unsigned SSECC = 8;
+#ifndef NDEBUG
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+ unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
@@ -8607,7 +9085,7 @@
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
@@ -8621,34 +9099,33 @@
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
- SDValue UNORD, EQ;
- UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- }
- if (SetCCOpcode == ISD::SETONE) {
- SDValue ORD, NEQ;
- ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(4, MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- llvm_unreachable("Illegal FP comparison");
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
@@ -8656,17 +9133,17 @@
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0;
+ unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false;
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
@@ -8683,10 +9160,12 @@
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
- return SDValue();
- if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
- return SDValue();
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+ return SDValue();
+ }
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
@@ -8924,6 +9403,21 @@
}
}
+ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+ // widen the cmov and push the truncate through. This avoids introducing a new
+ // branch during isel and doesn't add any extensions.
+ if (Op.getValueType() == MVT::i8 &&
+ Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+ if (T1.getValueType() == T2.getValueType() &&
+ // Blacklist CopyFromReg to avoid partial register stalls.
+ T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+ SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -9278,7 +9772,8 @@
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
- Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+ Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ SPTy).getValue(1);
SDValue Ops1[2] = { Chain.getValue(0), Chain };
return DAG.getMergeValues(Ops1, 2, dl);
@@ -9361,7 +9856,7 @@
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
@@ -9381,7 +9876,8 @@
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ .getFunction()->getFnAttributes()
+ .hasAttribute(Attributes::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@@ -9412,7 +9908,8 @@
false, false, false, 0);
}
-SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
SDValue Chain = Op.getOperand(0);
@@ -9461,8 +9958,7 @@
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
// The return type has to be a 128-bit type with the same element
@@ -9474,8 +9970,7 @@
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-SDValue
-X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
@@ -9505,8 +10000,8 @@
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc = 0;
- ISD::CondCode CC = ISD::SETCC_INVALID;
+ unsigned Opc;
+ ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
@@ -9580,55 +10075,102 @@
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
- return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
- return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
- return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
- return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
- return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -9637,15 +10179,18 @@
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -9675,7 +10220,7 @@
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
- unsigned X86CC = 0;
+ unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
@@ -9726,100 +10271,93 @@
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
- return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
- return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
- return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
- // Fix vector shift instructions where the last operand is a non-immediate
- // i32 value.
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = Op.getOperand(2);
- if (isa<ConstantSDNode>(ShAmt))
- return SDValue();
-
- unsigned NewIntNo = 0;
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
switch (IntNo) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntNo = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntNo = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntNo = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntNo = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntNo = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntNo = Intrinsic::x86_mmx_psra_w;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntNo = Intrinsic::x86_mmx_psra_d;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
- DAG.getConstant(0, MVT::i32));
-// FIXME this must be lowered to get rid of the invalid type.
-
- EVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(NewIntNo, MVT::i32),
- Op.getOperand(1), ShAmt);
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
}
+
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -9884,6 +10422,7 @@
SDValue(PCMP.getNode(), 1));
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
case Intrinsic::x86_sse42_pcmpistri128:
case Intrinsic::x86_sse42_pcmpestri128: {
unsigned Opcode;
@@ -9897,11 +10436,78 @@
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
}
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ Opc = X86ISD::FMADD;
+ break;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ Opc = X86ISD::FMSUB;
+ break;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ Opc = X86ISD::FNMADD;
+ break;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ Opc = X86ISD::FNMSUB;
+ break;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ Opc = X86ISD::FMADDSUB;
+ break;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ Opc = X86ISD::FMSUBADD;
+ break;
+ }
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
}
}
-SDValue
-X86TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -9939,21 +10545,21 @@
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(TD->getPointerSize(),
- Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT,
FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
@@ -9975,7 +10581,7 @@
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- return DAG.getIntPtrConstant(2*TD->getPointerSize());
+ return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -9990,7 +10596,7 @@
unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(TD->getPointerSize()));
+ DAG.getIntPtrConstant(RegInfo->getSlotSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
@@ -10001,8 +10607,22 @@
Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
}
-SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
return Op.getOperand(0);
}
@@ -10015,6 +10635,7 @@
DebugLoc dl = Op.getDebugLoc();
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
@@ -10023,8 +10644,8 @@
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
- const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
- const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
+ const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
+ const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
@@ -10097,7 +10718,7 @@
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
@@ -10126,7 +10747,7 @@
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
- const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
+ const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
@@ -10225,7 +10846,7 @@
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
-SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
@@ -10259,8 +10880,7 @@
return Op;
}
-SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
@@ -10285,7 +10905,7 @@
return Op;
}
-SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
unsigned NumBits = VT.getSizeInBits();
DebugLoc dl = Op.getDebugLoc();
@@ -10310,7 +10930,7 @@
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -10334,25 +10954,26 @@
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
-SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
-SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -10582,7 +11203,7 @@
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -10623,7 +11244,7 @@
return SDValue();
}
-SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
// looks for this combo and may remove the "setcc" instruction if the "setcc"
@@ -10738,7 +11359,7 @@
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
}
// fall through
case MVT::v4i32:
@@ -10751,7 +11372,8 @@
}
-SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
@@ -10796,8 +11418,8 @@
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
-SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
@@ -10835,7 +11457,8 @@
}
-SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
EVT T = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
unsigned Reg = 0;
@@ -10866,8 +11489,8 @@
return cpOut;
}
-SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget->is64Bit() && "Result not type legalized?");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = Op.getOperand(0);
@@ -10885,8 +11508,7 @@
return DAG.getMergeValues(Ops, 2, dl);
}
-SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
@@ -10906,7 +11528,7 @@
return SDValue();
}
-SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
EVT T = Node->getValueType(0);
@@ -10979,9 +11601,9 @@
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
- case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -10989,8 +11611,8 @@
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
- case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
- case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -11002,8 +11624,11 @@
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
+ case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -11014,7 +11639,7 @@
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
- case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -11023,13 +11648,15 @@
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
- case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, DAG);
@@ -11039,7 +11666,7 @@
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
- case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
@@ -11071,9 +11698,9 @@
Results.push_back(Swap.getValue(1));
}
-void X86TargetLowering::
+static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) const {
+ SelectionDAG &DAG, unsigned NewOp) {
DebugLoc dl = Node->getDebugLoc();
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
@@ -11132,6 +11759,27 @@
}
return;
}
+ case ISD::UINT_TO_FP: {
+ if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ N->getValueType(0) != MVT::v2f32)
+ return;
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
+ Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ return;
+ }
+ case ISD::FP_ROUND: {
+ SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ Results.push_back(V);
+ return;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
@@ -11194,26 +11842,56 @@
return;
}
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MAX:
+ Opc = X86ISD::ATOMMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ Opc = X86ISD::ATOMMIN64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ Opc = X86ISD::ATOMUMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ Opc = X86ISD::ATOMUMIN64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
+ }
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
@@ -11273,11 +11951,15 @@
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
+ case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
+ case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
@@ -11291,7 +11973,12 @@
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VZEXT: return "X86ISD::VZEXT";
+ case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -11358,6 +12045,8 @@
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
+ case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
+ case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
}
}
@@ -11493,7 +12182,7 @@
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
@@ -11506,430 +12195,724 @@
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
-// private utility function
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpc,
- unsigned immOpc,
- unsigned LoadOpc,
- unsigned CXchgOpc,
- unsigned notOpc,
- unsigned EAXreg,
- const TargetRegisterClass *RC,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [bitinstr.addr]
- // op t2 = t1, [bitinstr.val]
- // not t3 = t2 (if Invert)
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+/// Utility function to emit xbegin specifying the start of an RTM region.
+static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = MI->getDebugLoc();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // For the v = xbegin(), we generate
+ //
+ // thisMBB:
+ // xbegin sinkMBB
+ //
+ // mainMBB:
+ // eax = -1
+ //
+ // sinkMBB:
+ // v = eax
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- // Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- DebugLoc dl = bInstr->getDebugLoc();
- MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = bInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &bInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineFunction *MF = MBB->getParent();
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- if (Invert) {
- MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
- }
- else
- t3 = t2;
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // xbegin sinkMBB
+ // # fallthrough to mainMBB
+ // # abortion to sinkMBB
+ BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(sinkMBB);
+
+ // mainMBB:
+ // EAX = -1
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // EAX is live into the sinkMBB
+ sinkMBB->addLiveIn(X86::EAX);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
- MIB.addReg(t1);
+ MI->eraseFromParent();
+ return sinkMBB;
+}
- MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+// Get CMPXCHG opcode for the specified data type.
+static unsigned getCmpXChgOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::LCMPXCHG8;
+ case MVT::i16: return X86::LCMPXCHG16;
+ case MVT::i32: return X86::LCMPXCHG32;
+ case MVT::i64: return X86::LCMPXCHG64;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(EAXreg);
+// Get LOAD opcode for the specified data type.
+static unsigned getLoadOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::MOV8rm;
+ case MVT::i16: return X86::MOV16rm;
+ case MVT::i32: return X86::MOV32rm;
+ case MVT::i64: return X86::MOV64rm;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+// Get opcode of the non-atomic one from the specified atomic instruction.
+static unsigned getNonAtomicOpcode(unsigned Opc) {
+ switch (Opc) {
+ case X86::ATOMAND8: return X86::AND8rr;
+ case X86::ATOMAND16: return X86::AND16rr;
+ case X86::ATOMAND32: return X86::AND32rr;
+ case X86::ATOMAND64: return X86::AND64rr;
+ case X86::ATOMOR8: return X86::OR8rr;
+ case X86::ATOMOR16: return X86::OR16rr;
+ case X86::ATOMOR32: return X86::OR32rr;
+ case X86::ATOMOR64: return X86::OR64rr;
+ case X86::ATOMXOR8: return X86::XOR8rr;
+ case X86::ATOMXOR16: return X86::XOR16rr;
+ case X86::ATOMXOR32: return X86::XOR32rr;
+ case X86::ATOMXOR64: return X86::XOR64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction with
+// extra opcode.
+static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
+ case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
+ case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
+ case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
+ case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
+ case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
+ case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
+ case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
+ case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
+ case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
+ case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
+ case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target.
+static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
+ switch (Opc) {
+ case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
+ case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
+ case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
+ case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
+ case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
+ case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
+ case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
+ case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
+ case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target with extra opcode.
+static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
+ unsigned &HiOpc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND6432:
+ ExtraOpc = X86::NOT32r;
+ HiOpc = X86::AND32rr;
+ return X86::AND32rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+// Get pseudo CMOV opcode from the specified data type.
+static unsigned getPseudoCMOVOpc(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::CMOV_GR8;
+ case MVT::i16: return X86::CMOV_GR16;
+ case MVT::i32: return X86::CMOV_GR32;
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown CMOV opcode!");
}
-// private utility function: 64 bit atomics on 32 bit host.
+// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
+// They will be translated into a spin-loop or compare-exchange loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD MI.addr
+// loop:
+// t1 = OP MI.val, EAX
+// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+// JNE loop
+// sink:
+// dst = EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpcL,
- unsigned regOpcH,
- unsigned immOpcL,
- unsigned immOpcH,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB (instructions are in pairs, except cmpxchg8b)
- // ld t1,t2 = [bitinstr.addr]
- // newMBB:
- // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
- // op t5, t6 <- out1, out2, [bitinstr.val]
- // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
- // neg t7, t8 < t5, t6 (if Invert)
- // mov ECX, EBX <- t5, t6
- // mov EAX, EDX <- t1, t2
- // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
- // mov t3, t4 <- EAX, EDX
- // bz newMBB
- // result in out1, out2
- // fallthrough -->nextMBB
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const unsigned LoadOpc = X86::MOV32rm;
- const unsigned NotOpc = X86::NOT32r;
+X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg, SrcReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ MVT::SimpleValueType VT = *RC->vt_begin();
+ unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+
+ unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
+ unsigned LOADOpc = getLoadOpcode(VT);
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // EAX = LOAD [MI.addr]
+ // mainMBB:
+ // t1 = OP MI.val, EAX
+ // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = bInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
- "unexpected number of operands");
- MachineOperand& dest1Oper = bInstr->getOperand(0);
- MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
- argOpers[i] = &bInstr->getOperand(i+2);
-
- // We use some of the operands multiple times, so conservatively just
- // clear any kill flags that might be present.
- if (argOpers[i]->isReg() && argOpers[i]->isUse())
- argOpers[i]->setIsKill(false);
- }
-
- // x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
- // add 4 to displacement.
- for (int i=0; i <= lastAddrIndx-2; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MachineOperand newOp3 = *(argOpers[3]);
- if (newOp3.isImm())
- newOp3.setImm(newOp3.getImm()+4);
- else
- newOp3.setOffset(newOp3.getOffset()+4);
- (*MIB).addOperand(newOp3);
- (*MIB).addOperand(*argOpers[lastAddrIndx]);
-
- // t3/4 are defined later, at the bottom of the loop
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
- .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
-
- // The subsequent operations should be using the destination registers of
- // the PHI instructions.
- t1 = dest1Oper.getReg();
- t2 = dest2Oper.getReg();
-
- int valArgIndx = lastAddrIndx + 1;
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
- if (regOpcL != X86::MOV32rr)
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
- assert(argOpers[valArgIndx + 1]->isReg() ==
- argOpers[valArgIndx]->isReg());
- assert(argOpers[valArgIndx + 1]->isImm() ==
- argOpers[valArgIndx]->isImm());
- if (argOpers[valArgIndx + 1]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
- if (regOpcH != X86::MOV32rr)
- MIB.addReg(t2);
- (*MIB).addOperand(*argOpers[valArgIndx + 1]);
-
- unsigned t7, t8;
- if (Invert) {
- t7 = F->getRegInfo().createVirtualRegister(RC);
- t8 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
- } else {
- t7 = t5;
- t8 = t6;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(AccPhyReg);
+
+ // Copy AccPhyReg as it is used more than once.
+ unsigned AccReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
+ .addReg(AccPhyReg);
+
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+ case X86::ATOMAND8:
+ case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ case X86::ATOMOR8:
+ case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ case X86::ATOMXOR8:
+ case X86::ATOMXOR16:
+ case X86::ATOMXOR32:
+ case X86::ATOMXOR64: {
+ unsigned ARITHOpc = getNonAtomicOpcode(Opc);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
+ .addReg(AccReg);
+ break;
}
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64: {
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned NOTOpc;
+ unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
+ .addReg(AccReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+ break;
+ }
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64: {
+ unsigned CMPOpc;
+ unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
+
+ BuildMI(mainMBB, DL, TII->get(CMPOpc))
+ .addReg(SrcReg)
+ .addReg(AccReg);
+
+ if (Subtarget->hasCMov()) {
+ if (VT != MVT::i8) {
+ // Native support
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+ .addReg(SrcReg)
+ .addReg(AccReg);
+ } else {
+ // Promote i8 to i32 to use CMOV32
+ const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+ unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
+ unsigned AccReg32 = MRI.createVirtualRegister(RC32);
+ unsigned t2 = MRI.createVirtualRegister(RC32);
+
+ unsigned Undef = MRI.createVirtualRegister(RC32);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
+
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
+ .addReg(Undef)
+ .addReg(SrcReg)
+ .addImm(X86::sub_8bit);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
+ .addReg(Undef)
+ .addReg(AccReg)
+ .addImm(X86::sub_8bit);
+
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+ .addReg(SrcReg32)
+ .addReg(AccReg32);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
- MIB.addReg(t2);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
+ .addReg(t2, 0, X86::sub_8bit);
+ }
+ } else {
+ // Use pseudo select and lower them.
+ assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
+ "Invalid atomic-load-op transformation!");
+ unsigned SelOpc = getPseudoCMOVOpc(VT);
+ X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
+ assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
+ .addReg(SrcReg).addReg(AccReg)
+ .addImm(CC);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
+ }
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
- MIB.addReg(t7);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
- MIB.addReg(t8);
+ // Copy AccPhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
+ .addReg(AccReg);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.addReg(t1);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
- MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
- MIB.addReg(X86::EDX);
+ // sinkMBB:
+ sinkMBB->addLiveIn(AccPhyReg);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(AccPhyReg);
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ MI->eraseFromParent();
+ return sinkMBB;
}
-// private utility function
+// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
+// instructions. They will be translated into a spin-loop or compare-exchange
+// loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD [MI.addr + 0]
+// EDX = LOAD [MI.addr + 4]
+// loop:
+// EBX = OP MI.val.lo, EAX
+// ECX = OP MI.val.hi, EDX
+// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// JNE loop
+// sink:
+// dst = EDX:EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
- MachineBasicBlock *MBB,
- unsigned cmovOpc) const {
- // For the atomic min/max operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [min/max.addr]
- // mov t2 = [min/max.val]
- // cmp t1, t2
- // cmov[cond] t2 = t1
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
- //
+X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op32 to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstLoReg, DstHiReg;
+ unsigned SrcLoReg, SrcHiReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstLoReg = MI->getOperand(CurOp++).getReg();
+ DstHiReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcLoReg = MI->getOperand(CurOp++).getReg();
+ SrcHiReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
+ const TargetRegisterClass *RC8 = &X86::GR8RegClass;
+
+ unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
+ unsigned LOADOpc = X86::MOV32rm;
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // EAX = LOAD [MI.addr + 0]
+ // EDX = LOAD [MI.addr + 4]
+ // mainMBB:
+ // EBX = OP MI.vallo, EAX
+ // ECX = OP MI.valhi, EDX
+ // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(mInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to newMBB and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = mInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = mInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &mInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- // We only support register and immediate values
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
+ MachineInstrBuilder MIB;
- MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
- MIB.addReg(t1);
- MIB.addReg(t2);
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // Lo
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Hi
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
+ else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
- // Generate movc
- unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
- MIB.addReg(t2);
- MIB.addReg(t1);
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(X86::EAX);
+ mainMBB->addLiveIn(X86::EDX);
+
+ // Copy EDX:EAX as they are used more than once.
+ unsigned LoReg = MRI.createVirtualRegister(RC);
+ unsigned HiReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
- // Cmp and exchange if none has modified the memory location
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(mInstr->memoperands_begin(),
- mInstr->memoperands_end());
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(X86::EAX);
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
+ case X86::ATOMAND6432:
+ case X86::ATOMOR6432:
+ case X86::ATOMXOR6432:
+ case X86::ATOMADD6432:
+ case X86::ATOMSUB6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg);
+ break;
+ }
+ case X86::ATOMNAND6432: {
+ unsigned HiOpc, NOTOpc;
+ unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+ break;
+ }
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ unsigned cL = MRI.createVirtualRegister(RC8);
+ unsigned cH = MRI.createVirtualRegister(RC8);
+ unsigned cL32 = MRI.createVirtualRegister(RC);
+ unsigned cH32 = MRI.createVirtualRegister(RC);
+ unsigned cc = MRI.createVirtualRegister(RC);
+ // cl := cmp src_lo, lo
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
+ // ch := cmp src_hi, hi
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
+ // cc := if (src_hi == hi) ? cl : ch;
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
+ .addReg(cH32).addReg(cL32);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
+ .addReg(cH32).addReg(cL32)
+ .addImm(X86::COND_E);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
+ .addReg(SrcHiReg).addReg(HiReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
+ .addReg(SrcLoReg).addReg(LoReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
+ .addReg(SrcHiReg).addReg(HiReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
+ case X86::ATOMSWAP6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+ break;
+ }
+ }
+
+ // Copy EDX:EAX back from HiReg:LoReg
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+ // Copy ECX:EBX from t1H:t1L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ sinkMBB->addLiveIn(X86::EAX);
+ sinkMBB->addLiveIn(X86::EDX);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstLoReg)
+ .addReg(X86::EAX);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstHiReg)
+ .addReg(X86::EDX);
- mInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ MI->eraseFromParent();
+ return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
-MachineBasicBlock *
-X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned numArgs, bool memArg) const {
- assert(Subtarget->hasSSE42() &&
- "Target must have SSE4.2 or AVX features enabled");
+static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
+ case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
+ case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
+ case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
+ case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
+ case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
+ case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
+ case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
+ }
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+
+ unsigned NumArgs = MI->getNumOperands();
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+ if (MI->hasOneMemOperand())
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ BuildMI(*BB, MI, dl,
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::XMM0);
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+// FIXME: Custom handling because TableGen doesn't support multiple implicit
+// defs in an instruction pattern
+static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
unsigned Opc;
- if (!Subtarget->hasAVX()) {
- if (memArg)
- Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
- else
- Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
- } else {
- if (memArg)
- Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
- else
- Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
+ case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
+ case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
+ case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
+ case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
+ case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
+ case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
+ case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
}
+ DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
- for (unsigned i = 0; i < numArgs; ++i) {
- MachineOperand &Op = MI->getOperand(i+1);
+
+ unsigned NumArgs = MI->getNumOperands(); // remove the results
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
+ if (MI->hasOneMemOperand())
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
BuildMI(*BB, MI, dl,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
- .addReg(X86::XMM0);
+ .addReg(X86::ECX);
MI->eraseFromParent();
return BB;
}
-MachineBasicBlock *
-X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII,
+ const X86Subtarget* Subtarget) {
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// Address into RAX/EAX, other two args into ECX, EDX.
unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
@@ -12618,6 +13601,203 @@
}
MachineBasicBlock *
+X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg;
+ unsigned MemOpndSlot = 0;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MemOpndSlot = CurOp;
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // buf[LabelOffset] = restoreMBB
+ // SjLjSetup restoreMBB
+ //
+ // mainMBB:
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+ // restoreMBB:
+ // v_restore = 1
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+ MF->push_back(restoreMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ unsigned PtrStoreOpc = 0;
+ unsigned LabelReg = 0;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ Reloc::Model RM = getTargetMachine().getRelocationModel();
+ bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ (RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
+
+ // Prepare IP either in reg or imm.
+ if (!UseImmLabel) {
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ LabelReg = MRI.createVirtualRegister(PtrRC);
+ if (Subtarget->is64Bit()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
+ } else {
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+ .addReg(0);
+ }
+ } else
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+ // Store IP
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
+ else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ if (!UseImmLabel)
+ MIB.addReg(LabelReg);
+ else
+ MIB.addMBB(restoreMBB);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+ .addMBB(restoreMBB);
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(restoreMBB);
+
+ // mainMBB:
+ // EAX = 0
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(restoreMBB);
+
+ // restoreMBB:
+ BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
+ restoreMBB->addSuccessor(sinkMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
+ unsigned SP = RegInfo->getStackRegister();
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+
+ unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+ unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
+
+ // Reload FP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload IP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), LabelOffset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload SP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), SPOffset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Jump
+ BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
@@ -12741,188 +13921,106 @@
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
- return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
- return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
- return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
- return EmitPCMP(MI, BB, 5, true /* in mem */);
+ assert(Subtarget->hasSSE42() &&
+ "Target must have SSE4.2 or AVX features enabled");
+ return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo());
+
+ // String/text processing lowering.
+ case X86::PCMPISTRIREG:
+ case X86::VPCMPISTRIREG:
+ case X86::PCMPISTRIMEM:
+ case X86::VPCMPISTRIMEM:
+ case X86::PCMPESTRIREG:
+ case X86::VPCMPESTRIREG:
+ case X86::PCMPESTRIMEM:
+ case X86::VPCMPESTRIMEM:
+ assert(Subtarget->hasSSE42() &&
+ "Target must have SSE4.2 or AVX features enabled");
+ return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo());
- // Thread synchronization.
+ // Thread synchronization.
case X86::MONITOR:
- return EmitMonitor(MI, BB);
+ return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget);
- // Atomic Lowering.
- case X86::ATOMAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
- X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMXOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
- X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMNAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, true);
- case X86::ATOMMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
- case X86::ATOMMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
- case X86::ATOMUMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
- case X86::ATOMUMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+ // xbegin
+ case X86::XBEGIN:
+ return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo());
+ // Atomic Lowering.
+ case X86::ATOMAND8:
case X86::ATOMAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ // Fall through
+ case X86::ATOMOR8:
case X86::ATOMOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
- X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ // Fall through
case X86::ATOMXOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
- X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
- case X86::ATOMNAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass, true);
- case X86::ATOMMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
- case X86::ATOMMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
- case X86::ATOMUMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
- case X86::ATOMUMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
-
- case X86::ATOMAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
- case X86::ATOMOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
- X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMXOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
- X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
- case X86::ATOMNAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass, true);
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
- // This group is for 64-bit host.
- case X86::ATOMAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
- case X86::ATOMOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
- X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
+ case X86::ATOMXOR32:
case X86::ATOMXOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
- X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
+ // Fall through
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
case X86::ATOMNAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, true);
- case X86::ATOMMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+ // Fall through
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
case X86::ATOMMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
- case X86::ATOMUMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+ // Fall through
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ // Fall through
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
case X86::ATOMUMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+ // Fall through
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64:
+ return EmitAtomicLoadArith(MI, BB);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- false);
case X86::ATOMOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::OR32rr, X86::OR32rr,
- X86::OR32ri, X86::OR32ri,
- false);
case X86::ATOMXOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::XOR32rr, X86::XOR32rr,
- X86::XOR32ri, X86::XOR32ri,
- false);
case X86::ATOMNAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- true);
case X86::ATOMADD6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::ADD32rr, X86::ADC32rr,
- X86::ADD32ri, X86::ADC32ri,
- false);
case X86::ATOMSUB6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::SUB32rr, X86::SBB32rr,
- X86::SUB32ri, X86::SBB32ri,
- false);
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432:
case X86::ATOMSWAP6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::MOV32rr, X86::MOV32rr,
- X86::MOV32ri, X86::MOV32ri,
- false);
+ return EmitAtomicLoadArith6432(MI, BB);
+
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
+
+ case X86::EH_SjLj_SetJmp32:
+ case X86::EH_SjLj_SetJmp64:
+ return emitEHSjLjSetJmp(MI, BB);
+
+ case X86::EH_SjLj_LongJmp32:
+ case X86::EH_SjLj_LongJmp64:
+ return emitEHSjLjLongJmp(MI, BB);
}
}
@@ -13150,12 +14248,12 @@
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ if (Subtarget->hasAVX() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
@@ -13169,12 +14267,12 @@
}
-/// DCI, PerformTruncateCombine - Converts truncate operation to
+/// PerformTruncateCombine - Converts truncate operation to
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
-
-SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
- DAGCombinerInfo &DCI) const {
+static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
if (!DCI.isBeforeLegalizeOps())
return SDValue();
@@ -13215,8 +14313,9 @@
// PSHUFD
static const int ShufMask1[] = {0, 2, 0, 0};
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
// MOVLHPS
static const int ShufMask2[] = {0, 1, 4, 5};
@@ -13274,10 +14373,9 @@
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
@@ -13366,7 +14464,7 @@
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getTargetData()->
+ unsigned NewAlign = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
@@ -13397,6 +14495,14 @@
return NewOp;
SDValue InputVector = N->getOperand(0);
+ // Detect whether we are trying to convert from mmx to i32 and the bitcast
+ // from mmx to v2i32 has a single usage.
+ if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
+ InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
+ InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
+ N->getValueType(0),
+ InputVector.getNode()->getOperand(0));
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
@@ -13782,9 +14888,106 @@
return SDValue();
}
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+ // Quit if not CMP and SUB with its value result used.
+ if (Cmp.getOpcode() != X86ISD::CMP &&
+ (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+ return SDValue();
+
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ // Check CMP operands. One of them should be 0 or 1 and the other should be
+ // an SetCC or extended from it.
+ SDValue Op1 = Cmp.getOperand(0);
+ SDValue Op2 = Cmp.getOperand(1);
+
+ SDValue SetCC;
+ const ConstantSDNode* C = 0;
+ bool needOppositeCond = (CC == X86::COND_E);
+
+ if ((C = dyn_cast<ConstantSDNode>(Op1)))
+ SetCC = Op2;
+ else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+ SetCC = Op1;
+ else // Quit if all operands are not constants.
+ return SDValue();
+
+ if (C->getZExtValue() == 1)
+ needOppositeCond = !needOppositeCond;
+ else if (C->getZExtValue() != 0)
+ // Quit if the constant is neither 0 or 1.
+ return SDValue();
+
+ // Skip 'zext' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ SetCC = SetCC.getOperand(0);
+
+ switch (SetCC.getOpcode()) {
+ case X86ISD::SETCC:
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(1);
+ case X86ISD::CMOV: {
+ // Check whether false/true value has canonical one, i.e. 0 or 1.
+ ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
+ ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
+ // Quit if true value is not a constant.
+ if (!TVal)
+ return SDValue();
+ // Quit if false value is not a constant.
+ if (!FVal) {
+ // A special case for rdrand, where 0 is set if false cond is found.
+ SDValue Op = SetCC.getOperand(0);
+ if (Op.getOpcode() != X86ISD::RDRAND)
+ return SDValue();
+ }
+ // Quit if false value is not the constant 0 or 1.
+ bool FValIsFalse = true;
+ if (FVal && FVal->getZExtValue() != 0) {
+ if (FVal->getZExtValue() != 1)
+ return SDValue();
+ // If FVal is 1, opposite cond is needed.
+ needOppositeCond = !needOppositeCond;
+ FValIsFalse = false;
+ }
+ // Quit if TVal is not the constant opposite of FVal.
+ if (FValIsFalse && TVal->getZExtValue() != 1)
+ return SDValue();
+ if (!FValIsFalse && TVal->getZExtValue() != 0)
+ return SDValue();
+ CC = X86::CondCode(SetCC.getConstantOperandVal(2));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(3);
+ }
+ }
+
+ return SDValue();
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
// If the flag operand isn't dead, don't touch this CMOV.
@@ -13795,6 +14998,7 @@
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
+
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
@@ -13806,6 +15010,18 @@
}
}
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(Cond, CC);
+ if (Flags.getNode() &&
+ // Extra check as FCMOV only supports a subset of X86 cond.
+ (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@@ -13816,6 +15032,7 @@
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
+ std::swap(TrueOp, FalseOp);
}
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
@@ -13898,6 +15115,46 @@
}
}
}
+
+ // Handle these cases:
+ // (select (x != c), e, c) -> select (x != c), e, x),
+ // (select (x == c), c, e) -> select (x == c), x, e)
+ // where the c is an integer constant, and the "select" is the combination
+ // of CMOV and CMP.
+ //
+ // The rationale for this change is that the conditional-move from a constant
+ // needs two instructions, however, conditional-move from a register needs
+ // only one instruction.
+ //
+ // CAVEAT: By replacing a constant with a symbolic value, it may obscure
+ // some instruction-combining opportunities. This opt needs to be
+ // postponed as late as possible.
+ //
+ if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
+ // the DCI.xxxx conditions are provided to postpone the optimization as
+ // late as possible.
+
+ ConstantSDNode *CmpAgainst = 0;
+ if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
+ (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+
+ if (CC == X86::COND_NE &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueOp, FalseOp);
+ }
+
+ if (CC == X86::COND_E &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
+ SDValue Ops[] = { FalseOp, Cond.getOperand(0),
+ DAG.getConstant(CC, MVT::i8), Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
+ array_lengthof(Ops));
+ }
+ }
+ }
+
return SDValue();
}
@@ -14228,7 +15485,7 @@
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
- if (VT.getSizeInBits() == 256 &&
+ if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
@@ -14554,11 +15811,11 @@
ISD::LoadExtType Ext = Ld->getExtensionType();
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSE4 for the shuffles.
+ // shuffle. We need SSSE3 shuffles.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
if (RegVT.isVector() && RegVT.isInteger() &&
- Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
@@ -14673,7 +15930,7 @@
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() &&
+ if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
StoredVal.getNumOperands() == 2) {
SDValue Value0 = StoredVal.getOperand(0);
@@ -14784,7 +16041,8 @@
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getFnAttributes().
+ hasAttribute(Attributes::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
@@ -15056,6 +16314,29 @@
return SDValue();
}
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and FMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -15131,19 +16412,19 @@
// concat the vectors to original VT
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(OpVT);
+
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask1[0]);
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask2[0]);
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
@@ -15161,8 +16442,13 @@
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
return SDValue();
SDValue A = N->getOperand(0);
@@ -15184,9 +16470,10 @@
unsigned Opcode;
if (!NegMul)
- Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
- Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
return DAG.getNode(Opcode, dl, VT, A, B, C);
}
@@ -15283,36 +16570,77 @@
return SDValue();
}
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
+// an all-ones bit which is more useful than 0/1 in some cases.
+static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
+ DAG.getConstant(1, MVT::i8));
+}
+
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- unsigned X86CC = N->getConstantOperandVal(0);
- SDValue EFLAG = N->getOperand(1);
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+ SDValue EFLAGS = N->getOperand(1);
+
+ if (CC == X86::COND_A) {
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
+ EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ return MaterializeSETB(DL, NewEFLAGS, DAG);
+ }
+ }
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
- if (X86CC == X86::COND_B)
- return DAG.getNode(ISD::AND, DL, MVT::i8,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAG),
- DAG.getConstant(1, MVT::i8));
+ if (CC == X86::COND_B)
+ return MaterializeSETB(DL, EFLAGS, DAG);
+
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
return SDValue();
}
-static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) {
- SDValue Op0 = N->getOperand(0);
- EVT InVT = Op0->getValueType(0);
-
- // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
- DebugLoc dl = N->getDebugLoc();
- MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
- SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
- // Notice that we use SINT_TO_FP because we know that the high bits
- // are zero and SINT_TO_FP is better supported by the hardware.
- return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue EFLAGS = N->getOperand(3);
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
}
return SDValue();
@@ -15349,20 +16677,6 @@
return SDValue();
}
-static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
-
- // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT()
- if (VT == MVT::v8i8 || VT == MVT::v4i8) {
- DebugLoc dl = N->getDebugLoc();
- MVT DstVT = VT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
- SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, I);
- }
-
- return SDValue();
-}
-
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -15477,6 +16791,21 @@
return OptimizeConditionalInDecrement(N, DAG);
}
+/// performVZEXTCombine - Performs build vector combines
+static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // (vzext (bitcast (vzext (x)) -> (vzext x)
+ SDValue In = N->getOperand(0);
+ while (In.getOpcode() == ISD::BITCAST)
+ In = In.getOperand(0);
+
+ if (In.getOpcode() != X86ISD::VZEXT)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -15486,7 +16815,7 @@
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
- case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
@@ -15499,22 +16828,24 @@
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
- case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
- case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
- case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
+ case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
- case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:
@@ -15942,7 +17273,7 @@
return;
case 'K':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+ if (isInt<8>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
@@ -16267,3 +17598,207 @@
return Res;
}
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+struct X86CostTblEntry {
+ int ISD;
+ MVT Type;
+ unsigned Cost;
+};
+
+static int
+FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
+ for (unsigned int i = 0; i < len; ++i)
+ if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
+ return i;
+
+ // Could not find an entry.
+ return -1;
+}
+
+struct X86TypeConversionCostTblEntry {
+ int ISD;
+ MVT Dst;
+ MVT Src;
+ unsigned Cost;
+};
+
+static int
+FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
+ int ISD, MVT Dst, MVT Src) {
+ for (unsigned int i = 0; i < len; ++i)
+ if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
+ return i;
+
+ // Could not find an entry.
+ return -1;
+}
+
+unsigned
+X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
+ Type *Ty) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
+
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ static const X86CostTblEntry AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::MUL, MVT::v4i64, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+ };
+
+ // Look for AVX1 lowering tricks.
+ if (ST.hasAVX()) {
+ int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
+ LT.second);
+ if (Idx != -1)
+ return LT.first * AVX1CostTable[Idx].Cost;
+ }
+ // Fallback to the default implementation.
+ return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
+}
+
+unsigned
+X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ if (Index != -1U) {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val);
+
+ // This type is legalized to a scalar type.
+ if (!LT.second.isVector())
+ return 0;
+
+ // The type may be split. Normalize the index to the new type.
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+
+ // Floating point scalars are already located in index #0.
+ if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ return 0;
+ }
+
+ return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
+ Type *ValTy,
+ Type *CondTy) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ const X86Subtarget &ST =
+ TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ static const X86CostTblEntry SSE42CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+ { ISD::SETCC, MVT::v2i64, 1 },
+ { ISD::SETCC, MVT::v4i32, 1 },
+ { ISD::SETCC, MVT::v8i16, 1 },
+ { ISD::SETCC, MVT::v16i8, 1 },
+ };
+
+ static const X86CostTblEntry AVX1CostTbl[] = {
+ { ISD::SETCC, MVT::v4f64, 1 },
+ { ISD::SETCC, MVT::v8f32, 1 },
+ // AVX1 does not support 8-wide integer compare.
+ { ISD::SETCC, MVT::v4i64, 4 },
+ { ISD::SETCC, MVT::v8i32, 4 },
+ { ISD::SETCC, MVT::v16i16, 4 },
+ { ISD::SETCC, MVT::v32i8, 4 },
+ };
+
+ static const X86CostTblEntry AVX2CostTbl[] = {
+ { ISD::SETCC, MVT::v4i64, 1 },
+ { ISD::SETCC, MVT::v8i32, 1 },
+ { ISD::SETCC, MVT::v16i16, 1 },
+ { ISD::SETCC, MVT::v32i8, 1 },
+ };
+
+ if (ST.hasSSE42()) {
+ int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTbl[Idx].Cost;
+ }
+
+ if (ST.hasAVX()) {
+ int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTbl[Idx].Cost;
+ }
+
+ if (ST.hasAVX2()) {
+ int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX2CostTbl[Idx].Cost;
+ }
+
+ return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
+ Type *Dst,
+ Type *Src) const {
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
+ };
+
+ if (ST.hasAVX()) {
+ int Idx = FindInConvertTable(AVXConversionTbl,
+ array_lengthof(AVXConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return AVXConversionTbl[Idx].Cost;
+ }
+
+ return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+}
+
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86ISelLowering.h Tue Jan 15 11:16:16 2013
@@ -19,6 +19,7 @@
#include "X86RegisterInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -137,15 +138,15 @@
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
/// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
MOVDQ2Q,
+ /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+ /// vector to a GPR.
+ MMX_MOVD2W,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
@@ -199,6 +200,9 @@
///
FMAX, FMIN,
+ /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ FMAXC, FMINC,
+
/// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
/// approximation. Note that these typically require refinement
/// in order to obtain suitable precision.
@@ -218,6 +222,12 @@
// EH_RETURN - Exception Handling helpers.
EH_RETURN,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// TC_RETURN - Tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -231,6 +241,18 @@
// VSEXT_MOVL - Vector move low and sign extend.
VSEXT_MOVL,
+ // VZEXT - Vector integer zero-extend.
+ VZEXT,
+
+ // VSEXT - Vector integer signed-extend.
+ VSEXT,
+
+ // VFPEXT - Vector FP extend.
+ VFPEXT,
+
+ // VFPROUND - Vector FP round.
+ VFPROUND,
+
// VSHL, VSRL - 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -346,6 +368,10 @@
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
+ ATOMMAX64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMAX64_DAG,
+ ATOMUMIN64_DAG,
ATOMSWAP64_DAG,
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
@@ -459,10 +485,6 @@
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const;
- /// getStackPtrReg - Return the stack pointer register we are using: either
- /// ESP or RSP.
- unsigned getStackPtrReg() const { return X86StackPtr; }
-
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
@@ -695,10 +717,7 @@
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
const X86RegisterInfo *RegInfo;
- const TargetData *TD;
-
- /// X86StackPtr - X86 physical register used as stack ptr.
- unsigned X86StackPtr;
+ const DataLayout *TD;
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
@@ -742,6 +761,7 @@
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -761,15 +781,11 @@
SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -783,12 +799,15 @@
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -800,37 +819,26 @@
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const;
- // Utility functions to help LowerVECTOR_SHUFFLE
- SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+ // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
+ SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+ SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -863,54 +871,17 @@
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
- void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG, unsigned NewOp) const;
-
- /// Utility function to emit string processing sse4.2 instructions
- /// that return in xmm0.
- /// This takes the instruction to expand, the associated machine basic
- /// block, the number of args, and whether or not the second arg is
- /// in memory or not.
- MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
- unsigned argNum, bool inMem) const;
-
- /// Utility functions to emit monitor and mwait instructions. These
- /// need to make sure that the arguments to the intrinsic are in the
- /// correct registers.
- MachineBasicBlock *EmitMonitor(MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
-
- /// Utility function to emit atomic bitwise operations (and, or, xor).
- /// It takes the bitwise instruction to expand, the associated machine basic
- /// block, and the associated X86 opcodes for reg/reg and reg/imm.
- MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
- MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned regOpc,
- unsigned immOpc,
- unsigned loadOpc,
- unsigned cxchgOpc,
- unsigned notOpc,
- unsigned EAXreg,
- const TargetRegisterClass *RC,
- bool Invert = false) const;
-
- MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
- MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned regOpcL,
- unsigned regOpcH,
- unsigned immOpcL,
- unsigned immOpcH,
- bool Invert = false) const;
-
- /// Utility function to emit atomic min and max. It takes the min/max
- /// instruction to expand, the associated basic block, and the associated
- /// cmov opcode for moving the min or max value.
- MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned cmovOpc) const;
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, max, min, umax, umin). It takes the corresponding instruction to
+ /// expand, the associated machine basic block, and the associated X86
+ /// opcodes for reg/reg.
+ MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
+ MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *EmitVAARG64WithCustomInserter(
@@ -938,6 +909,12 @@
MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
@@ -955,6 +932,23 @@
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
+
+ class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
+ public:
+ explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
+ VectorTargetTransformImpl(TL) {}
+
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ };
}
#endif // X86ISELLOWERING_H
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrCompiler.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrCompiler.td Tue Jan 15 11:16:16 2013
@@ -165,6 +165,33 @@
}
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in {
+ def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+ def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
@@ -230,25 +257,18 @@
IIC_ALU_NONMEM>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
- IIC_ALU_NONMEM>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
- IIC_ALU_NONMEM>,
- OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
- IIC_ALU_NONMEM>;
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
- IIC_ALU_NONMEM>;
+def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
} // isCodeGenOnly
@@ -453,6 +473,11 @@
"#CMOV_GR16* PSEUDO!",
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+} // Predicates = [NoCMov]
+
+// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
+// SSE1.
+let Predicates = [FPStackf32] in
def CMOV_RFP32 : I<0, Pseudo,
(outs RFP32:$dst),
(ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
@@ -460,6 +485,9 @@
[(set RFP32:$dst,
(X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
EFLAGS))]>;
+// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
+// SSE2.
+let Predicates = [FPStackf64] in
def CMOV_RFP64 : I<0, Pseudo,
(outs RFP64:$dst),
(ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
@@ -474,7 +502,6 @@
[(set RFP80:$dst,
(X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
-} // Predicates = [NoCMov]
} // UsesCustomInserter = 1, Uses = [EFLAGS]
@@ -482,130 +509,74 @@
// Atomic Instruction Pseudo Instructions
//===----------------------------------------------------------------------===//
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
+// Pseudo atomic instructions
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMXOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMNAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMXOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMNAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
- "#ATOMMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMXOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMNAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "#ATOMMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-
-
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMXOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMNAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
- "#ATOMMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
-}
-
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
- Defs = [EFLAGS, EAX, EBX, ECX, EDX],
- Uses = [EAX, EBX, ECX, EDX],
- mayLoad = 1, mayStore = 1,
- usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSWAP6432 PSEUDO!", []>;
+multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
+ let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
+ def #NAME#8 : I<0, Pseudo, (outs GR8:$dst),
+ (ins i8mem:$ptr, GR8:$val),
+ !strconcat(mnemonic, "8 PSEUDO!"), []>;
+ def #NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+ (ins i16mem:$ptr, GR16:$val),
+ !strconcat(mnemonic, "16 PSEUDO!"), []>;
+ def #NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+ (ins i32mem:$ptr, GR32:$val),
+ !strconcat(mnemonic, "32 PSEUDO!"), []>;
+ def #NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+ (ins i64mem:$ptr, GR64:$val),
+ !strconcat(mnemonic, "64 PSEUDO!"), []>;
+ }
+}
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
+ def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
+ (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
+ (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
+ (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
+ (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
}
+// Atomic exchange, and, or, xor
+defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
+defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
+defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
+defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
+defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
+
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
+ let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in
+ def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ !strconcat(mnemonic, "6432 PSEUDO!"), []>;
+}
+
+defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
+defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
+defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
+defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
+defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
+defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
+defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
+defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
+
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -617,7 +588,6 @@
// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
[], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
@@ -637,72 +607,72 @@
def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+ [], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+ [], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
@@ -717,106 +687,116 @@
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
+multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
+ string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-
-def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
+def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
+ !strconcat(mnemonic, "{b}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+ !strconcat(mnemonic, "{w}\t$dst"),
+ [], IIC_UNARY_MEM>, OpSize, LOCK;
+def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+ !strconcat(mnemonic, "{l}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+ !strconcat(mnemonic, "{q}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+}
}
+defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
+defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
+
// Atomic compare and swap.
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- isCodeGenOnly = 1 in
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
- "lock\n\t"
- "cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
+multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
+ SDPatternOperator frag, X86MemOperand x86memop,
+ InstrItinClass itin> {
+let isCodeGenOnly = 1 in {
+ def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
+ !strconcat(mnemonic, "\t$ptr"),
+ [(frag addr:$ptr)], itin>, TB, LOCK;
+}
+}
+
+multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
+ string mnemonic, SDPatternOperator frag,
+ InstrItinClass itin8, InstrItinClass itin> {
+let isCodeGenOnly = 1 in {
+ let Defs = [AL, EFLAGS], Uses = [AL] in
+ def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+ let Defs = [AX, EFLAGS], Uses = [AX] in
+ def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+ let Defs = [EAX, EFLAGS], Uses = [EAX] in
+ def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+ let Defs = [RAX, EFLAGS], Uses = [RAX] in
+ def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+}
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
+ X86cas8, i64mem,
+ IIC_CMPX_LOCK_8B>;
+}
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- isCodeGenOnly = 1 in
-def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
- "lock\n\t"
- "cmpxchg16b\t$ptr",
- [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
- Requires<[HasCmpxchg16b]>;
-
-let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
- "lock\n\t"
- "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
-}
-
-let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
- "lock\n\t"
- "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
-}
-
-let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
- "lock\n\t"
- "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
-}
-
-let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
- "lock\n\t"
- "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
+ Predicates = [HasCmpxchg16b] in {
+defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
+ X86cas16, i128mem,
+ IIC_CMPX_LOCK_16B>, REX_W;
}
+defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
+ X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
+
// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
-def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "lock\n\t"
- "xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
- IIC_XADD_LOCK_MEM8>,
- TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
- "lock\n\t"
- "xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
- IIC_XADD_LOCK_MEM>,
- TB, OpSize, LOCK;
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
- "lock\n\t"
- "xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
- IIC_XADD_LOCK_MEM>,
- TB, LOCK;
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
- "lock\n\t"
- "xadd{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
- IIC_XADD_LOCK_MEM>,
- TB, LOCK;
-}
+multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
+ string frag,
+ InstrItinClass itin8, InstrItinClass itin> {
+ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+ def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin8>;
+ def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+ itin>;
+ }
+}
+
+defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
+ IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
+ TB, LOCK;
def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
"#ACQUIRE_MOV PSEUDO!",
@@ -1017,7 +997,24 @@
def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
-// tailcall stuff
+// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
+// can never use callee-saved registers. That is the purpose of the GR64_TC
+// register classes.
+//
+// The only volatile register that is never used by the calling convention is
+// %r11. This happens when calling a vararg function with 6 arguments.
+//
+// Match an X86tcret that uses less than 7 volatile registers.
+def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
+ (X86tcret node:$ptr, node:$off), [{
+ // X86tcret args: (*chain, ptr, imm, regs..., glue)
+ unsigned NumRegs = 0;
+ for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
+ if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
+ return false;
+ return true;
+}]>;
+
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In32BitMode]>;
@@ -1041,7 +1038,9 @@
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode]>;
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
+// Don't fold loads into X86tcret requiring more than 6 regs.
+// There wouldn't be enough scratch registers for base+index.
+def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
Requires<[In64BitMode]>;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrControl.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrControl.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrControl.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrControl.td Tue Jan 15 11:16:16 2013
@@ -16,15 +16,18 @@
//
// Return instructions.
+//
+// The X86retflag return instructions are variadic because we may add ST0 and
+// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins),
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)], IIC_RET>;
def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}",
[], IIC_RET>, OpSize;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFMA.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFMA.td Tue Jan 15 11:16:16 2013
@@ -16,243 +16,180 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
-multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
- def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
- let mayLoad = 1 in
- def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-} // neverHasSideEffects = 1
-}
-
-// Intrinsic for 213 pattern
-multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256, SDNode Op213,
- ValueType OpVT128, ValueType OpVT256> {
- def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
- VR128:$src3))]>;
-
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ ValueType OpVT128, ValueType OpVT256,
+ SDPatternOperator Op = null_frag> {
+ let isCommutable = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2,
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
- (MemFrag128 addr:$src3)))]>;
-
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, VR128:$src1,
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
-
- def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
- VR256:$src3))]>;
-
+ let isCommutable = 1 in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (OpVT256 (Op213 VR256:$src2, VR256:$src1,
- VR256:$src3)))]>;
-
- def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
- (MemFrag256 addr:$src3)))]>;
+ [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
+ VR256:$src3)))]>, VEX_L;
+ let mayLoad = 1 in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst,
- (OpVT256 (Op213 VR256:$src2, VR256:$src1,
- (MemFrag256 addr:$src3))))]>;
+ (OpVT256 (Op VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3))))]>, VEX_L;
}
} // Constraints = "$src1 = $dst"
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256, SDNode Op,
- ValueType OpTy128, ValueType OpTy256> {
- defm r213 : fma3p_rm_int <opc213, !strconcat(OpcodeStr,
- !strconcat("213", PackTy)), MemFrag128, MemFrag256,
- Int128, Int256, Op, OpTy128, OpTy256>;
- defm r132 : fma3p_rm <opc132,
- !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
- defm r231 : fma3p_rm <opc231,
- !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+ SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+ defm r213 : fma3p_rm<opc213,
+ !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3p_rm<opc132,
+ !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ defm r231 : fma3p_rm<opc231,
+ !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+} // neverHasSideEffects = 1
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmadd_ps,
- int_x86_fma_vfmadd_ps_256, X86Fmadd,
- v4f32, v8f32>;
+ memopv8f32, X86Fmadd, v4f32, v8f32>;
defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmsub_ps,
- int_x86_fma_vfmsub_ps_256, X86Fmsub,
- v4f32, v8f32>;
+ memopv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32,
- int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub,
+ memopv4f32, memopv8f32, X86Fmaddsub,
v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32,
- int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmaddsub_ps_256, X86Fmsubadd,
+ memopv4f32, memopv8f32, X86Fmsubadd,
v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmadd_pd,
- int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsub_pd,
- int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
- memopv2f64, memopv4f64,
- int_x86_fma_vfmaddsub_pd,
- int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub,
+ memopv2f64, memopv4f64, X86Fmaddsub,
v2f64, v4f64>, VEX_W;
defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
- memopv2f64, memopv4f64,
- int_x86_fma_vfmsubadd_pd,
- int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd,
+ memopv2f64, memopv4f64, X86Fmsubadd,
v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmadd_ps,
- int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32,
- v8f32>;
+ memopv8f32, X86Fnmadd, v4f32, v8f32>;
defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmsub_ps,
- int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32,
- v8f32>;
+ memopv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmadd_pd,
- int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
- memopv2f64,
- memopv4f64, int_x86_fma_vfnmsub_pd,
- int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64,
+ memopv2f64, memopv4f64, X86Fnmsub, v2f64,
v4f64>, VEX_W;
}
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
- RegisterClass RC> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+ RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ SDPatternOperator OpNode = null_frag> {
+ let isCommutable = 1 in
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-} // neverHasSideEffects = 1
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
}
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic IntId,
- RegisterClass RC, SDNode OpNode, ValueType OpVT> {
+ ComplexPattern mem_cpat, Intrinsic IntId,
+ RegisterClass RC> {
+ let isCommutable = 1 in
def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
VR128:$src3))]>;
def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
- def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
}
} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, Intrinsic IntF32, Intrinsic IntF64,
- SDNode OpNode> {
- defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
- defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
- defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>,
- VEX_W;
- defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>,
- VEX_W;
- defm SSr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213ss"), ssmem,
- sse_load_f32, IntF32, FR32, OpNode, f32>;
- defm SDr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213sd"), sdmem,
- sse_load_f64, IntF64, FR64, OpNode, f64>, VEX_W;
-}
-
-defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
- int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
-defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
- int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
-
-defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
- int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
-defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
- int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
+ string OpStr, string PackTy, Intrinsic Int,
+ SDNode OpNode, RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
+ ComplexPattern mem_cpat> {
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+}
+
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ x86memop, RC, OpVT, mem_frag, OpNode>,
+ fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ memop, mem_cpat, Int, RC>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+ SDNode OpNode> {
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+ FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+ FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+}
+
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+ int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+ int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
+
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+ int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+ int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
//===----------------------------------------------------------------------===//
@@ -260,73 +197,102 @@
//===----------------------------------------------------------------------===//
-multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic Int> {
- def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+ PatFrag mem_frag> {
+ let isCommutable = 1 in
+ def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
- def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
- def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src2, VR128:$src3),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+ [(set RC:$dst,
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
// For disassembler
let isCodeGenOnly = 1 in
- def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
}
-multiclass fma4p<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256,
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic Int> {
+ let isCommutable = 1 in
+ def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
+ mem_cpat:$src3))]>, VEX_W, MemOp4;
+ def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, memop:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256> {
+ let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+ VEX_W, MemOp4;
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2,
+ [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ let isCommutable = 1 in
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4;
+ (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+ VEX_W, MemOp4, VEX_L;
def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2,
- (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
+ [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
+ (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
+ [(set VR256:$dst, (OpNode VR256:$src1,
+ (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
// For disassembler
let isCodeGenOnly = 1 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
@@ -336,51 +302,65 @@
def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+ VEX_L;
} // isCodeGenOnly = 1
}
let Predicates = [HasFMA4] in {
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
-defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps,
- int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd,
- int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
-defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps,
- int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd,
- int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
-defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps,
- int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd,
- int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
-defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps,
- int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd,
- int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd,
- int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd,
- int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
+defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+
+defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
} // HasFMA4
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFormats.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFormats.td Tue Jan 15 11:16:16 2013
@@ -44,14 +44,15 @@
def MRM_D0 : Format<45>;
def MRM_D1 : Format<46>;
def MRM_D4 : Format<47>;
-def MRM_D8 : Format<48>;
-def MRM_D9 : Format<49>;
-def MRM_DA : Format<50>;
-def MRM_DB : Format<51>;
-def MRM_DC : Format<52>;
-def MRM_DD : Format<53>;
-def MRM_DE : Format<54>;
-def MRM_DF : Format<55>;
+def MRM_D5 : Format<48>;
+def MRM_D8 : Format<49>;
+def MRM_D9 : Format<50>;
+def MRM_DA : Format<51>;
+def MRM_DB : Format<52>;
+def MRM_DC : Format<53>;
+def MRM_DD : Format<54>;
+def MRM_DE : Format<55>;
+def MRM_DF : Format<56>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -287,12 +288,14 @@
let CodeSize = 3;
}
+def __xs : XS;
+
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -303,7 +306,7 @@
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -314,18 +317,25 @@
InstrItinClass itin, Domain d>
: I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
}
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin, Domain d>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
+ let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]);
+}
+
// PIi8 - SSE 1 & 2 packed instructions with immediate
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
@@ -341,18 +351,18 @@
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
@@ -372,27 +382,31 @@
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
// VSDI - SSE2 instructions with XD prefix in AVX form.
// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
+// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+// MMX operands.
+// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
@@ -405,6 +419,12 @@
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -415,21 +435,23 @@
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
// SSSE3 Instruction Templates:
//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
+// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
//
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
// uses the MMX registers. The 64-bit versions are grouped with the MMX
@@ -438,10 +460,18 @@
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSSE3]>;
+ Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
@@ -452,11 +482,11 @@
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
//
@@ -464,9 +494,10 @@
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSE42]>;
+ Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
+// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
@@ -475,7 +506,7 @@
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- Requires<[HasSSE42]>;
+ Requires<[UseSSE42]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jan 15 11:16:16 2013
@@ -29,6 +29,13 @@
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
@@ -73,14 +80,31 @@
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0> ]>>;
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vzext : SDNode<"X86ISD::VZEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vsext : SDNode<"X86ISD::VSEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vfpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+def X86vfpround: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@@ -170,8 +194,8 @@
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
-def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>;
-def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -227,6 +251,10 @@
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+// 128-/256-bit extload pattern fragments
+def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.cpp Tue Jan 15 11:16:16 2013
@@ -414,12 +414,6 @@
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -567,6 +561,16 @@
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
+
+ // BMI/BMI2 foldable instructions
+ { X86::RORX32ri, X86::RORX32mi, 0 },
+ { X86::RORX64ri, X86::RORX64mi, 0 },
+ { X86::SARX32rr, X86::SARX32rm, 0 },
+ { X86::SARX64rr, X86::SARX64rm, 0 },
+ { X86::SHRX32rr, X86::SHRX32rm, 0 },
+ { X86::SHRX64rr, X86::SHRX64rm, 0 },
+ { X86::SHLX32rr, X86::SHLX32rm, 0 },
+ { X86::SHLX64rr, X86::SHLX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -680,6 +684,12 @@
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
@@ -1110,6 +1120,44 @@
{ X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
{ X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
+
+ // BMI/BMI2 foldable instructions
+ { X86::MULX32rr, X86::MULX32rm, 0 },
+ { X86::MULX64rr, X86::MULX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1145,10 +1193,6 @@
{ X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDPSr213r_Int, X86::VFMADDPSr213m_Int, TB_ALIGN_16 },
- { X86::VFMADDPDr213r_Int, X86::VFMADDPDr213m_Int, TB_ALIGN_16 },
- { X86::VFMADDPSr213rY_Int, X86::VFMADDPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFMADDPDr213rY_Int, X86::VFMADDPDr213mY_Int, TB_ALIGN_32 },
{ X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
{ X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
@@ -1171,10 +1215,6 @@
{ X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr213r_Int, X86::VFNMADDPSr213m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPDr213r_Int, X86::VFNMADDPDr213m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPSr213rY_Int, X86::VFNMADDPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFNMADDPDr213rY_Int, X86::VFNMADDPDr213mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
{ X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
@@ -1197,10 +1237,6 @@
{ X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr213r_Int, X86::VFMSUBPSr213m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPDr213r_Int, X86::VFMSUBPDr213m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPSr213rY_Int, X86::VFMSUBPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBPDr213rY_Int, X86::VFMSUBPDr213mY_Int, TB_ALIGN_32 },
{ X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
{ X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
@@ -1223,10 +1259,6 @@
{ X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr213r_Int, X86::VFNMSUBPSr213m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPDr213r_Int, X86::VFNMSUBPDr213m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPSr213rY_Int, X86::VFNMSUBPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFNMSUBPDr213rY_Int, X86::VFNMSUBPDr213mY_Int, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
{ X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
@@ -1240,10 +1272,6 @@
{ X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPSr213r_Int, X86::VFMADDSUBPSr213m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr213r_Int, X86::VFMADDSUBPDr213m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr213rY_Int, X86::VFMADDSUBPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr213rY_Int, X86::VFMADDSUBPDr213mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
{ X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
@@ -1257,10 +1285,40 @@
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPSr213r_Int, X86::VFMSUBADDPSr213m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr213r_Int, X86::VFMSUBADDPDr213m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr213rY_Int, X86::VFMSUBADDPSr213mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr213rY_Int, X86::VFMSUBADDPDr213mY_Int, TB_ALIGN_32 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1318,8 +1376,7 @@
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
switch (MI.getOpcode()) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -1483,69 +1540,69 @@
AliasAnalysis *AA) const {
switch (MI->getOpcode()) {
default: break;
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::LD_Fp64m:
- case X86::MOVSSrm:
- case X86::MOVSDrm:
- case X86::MOVAPSrm:
- case X86::MOVUPSrm:
- case X86::MOVAPDrm:
- case X86::MOVDQArm:
- case X86::VMOVSSrm:
- case X86::VMOVSDrm:
- case X86::VMOVAPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVAPDrm:
- case X86::VMOVDQArm:
- case X86::VMOVAPSYrm:
- case X86::VMOVUPSYrm:
- case X86::VMOVAPDYrm:
- case X86::VMOVDQAYrm:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::FsVMOVAPSrm:
- case X86::FsVMOVAPDrm:
- case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
- // Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- MI->isInvariantLoad(AA)) {
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0 || BaseReg == X86::RIP)
- return true;
- // Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
- return false;
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
- }
- return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
}
+ return false;
+ }
- case X86::LEA32r:
- case X86::LEA64r: {
- if (MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- !MI->getOperand(4).isReg()) {
- // lea fi#, lea GV, etc. are all rematerializable.
- if (!MI->getOperand(1).isReg())
- return true;
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0)
- return true;
- // Allow re-materialization of lea PICBase + x.
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
- }
- return false;
- }
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
}
// All other instructions marked M_REMATERIALIZABLE are always trivially
@@ -1654,7 +1711,7 @@
case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
@@ -1727,8 +1784,7 @@
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
switch (MIOpc) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
@@ -1812,10 +1868,8 @@
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
// All instructions input are two-addr instructions. Get the known operands.
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isKill = MI->getOperand(1).isKill();
+ const MachineOperand &Dest = MI->getOperand(0);
+ const MachineOperand &Src = MI->getOperand(1);
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
@@ -1833,11 +1887,9 @@
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
- unsigned A = MI->getOperand(0).getReg();
unsigned M = MI->getOperand(3).getImm();
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addReg(A, RegState::Define | getDeadRegState(isDead))
- .addReg(B, getKillRegState(isKill)).addImm(M);
+ .addOperand(Dest).addOperand(Src).addImm(M);
break;
}
case X86::SHUFPDrri: {
@@ -1847,15 +1899,13 @@
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
- unsigned A = MI->getOperand(0).getReg();
unsigned M = MI->getOperand(3).getImm();
// Convert to PSHUFD mask.
M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addReg(A, RegState::Define | getDeadRegState(isDead))
- .addReg(B, getKillRegState(isKill)).addImm(M);
+ .addOperand(Dest).addOperand(Src).addImm(M);
break;
}
case X86::SHL64ri: {
@@ -1866,15 +1916,14 @@
if (ShAmt == 0 || ShAmt >= 4) return 0;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR64_NOSPRegClass))
return 0;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
case X86::SHL32ri: {
@@ -1885,15 +1934,15 @@
if (ShAmt == 0 || ShAmt >= 4) return 0;
// LEA can't handle ESP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR32_NOSPRegClass))
return 0;
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
case X86::SHL16ri: {
@@ -1906,10 +1955,8 @@
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
default: {
@@ -1932,14 +1979,12 @@
(const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, RC))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
return 0;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, 1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), 1);
break;
}
case X86::INC16r:
@@ -1947,10 +1992,8 @@
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, 1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), 1);
break;
case X86::DEC64r:
case X86::DEC32r:
@@ -1962,14 +2005,12 @@
(const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
(const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, RC))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
return 0;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, -1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), -1);
break;
}
case X86::DEC16r:
@@ -1977,10 +2018,8 @@
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, -1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), -1);
break;
case X86::ADD64rr:
case X86::ADD64rr_DB:
@@ -2007,9 +2046,8 @@
return 0;
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, Src2, isKill2);
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
// Preserve undefness of the operands.
bool isUndef = MI->getOperand(1).isUndef();
@@ -2029,9 +2067,15 @@
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, Src2, isKill2);
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
if (LV && isKill2)
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
@@ -2041,10 +2085,9 @@
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
case X86::ADD32ri8:
@@ -2052,10 +2095,9 @@
case X86::ADD32ri8_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
}
case X86::ADD16ri:
@@ -2065,10 +2107,9 @@
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
}
}
@@ -2077,10 +2118,10 @@
if (!NewMI) return 0;
if (LV) { // Update live variables
- if (isKill)
- LV->replaceKillInstruction(Src, MI, NewMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, NewMI);
+ if (Src.isKill())
+ LV->replaceKillInstruction(Src.getReg(), MI, NewMI);
+ if (Dest.isDead())
+ LV->replaceKillInstruction(Dest.getReg(), MI, NewMI);
}
MFI->insert(MBBI, NewMI); // Insert the new inst
@@ -2120,57 +2161,25 @@
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
- case X86::CMOVB16rr:
- case X86::CMOVB32rr:
- case X86::CMOVB64rr:
- case X86::CMOVAE16rr:
- case X86::CMOVAE32rr:
- case X86::CMOVAE64rr:
- case X86::CMOVE16rr:
- case X86::CMOVE32rr:
- case X86::CMOVE64rr:
- case X86::CMOVNE16rr:
- case X86::CMOVNE32rr:
- case X86::CMOVNE64rr:
- case X86::CMOVBE16rr:
- case X86::CMOVBE32rr:
- case X86::CMOVBE64rr:
- case X86::CMOVA16rr:
- case X86::CMOVA32rr:
- case X86::CMOVA64rr:
- case X86::CMOVL16rr:
- case X86::CMOVL32rr:
- case X86::CMOVL64rr:
- case X86::CMOVGE16rr:
- case X86::CMOVGE32rr:
- case X86::CMOVGE64rr:
- case X86::CMOVLE16rr:
- case X86::CMOVLE32rr:
- case X86::CMOVLE64rr:
- case X86::CMOVG16rr:
- case X86::CMOVG32rr:
- case X86::CMOVG64rr:
- case X86::CMOVS16rr:
- case X86::CMOVS32rr:
- case X86::CMOVS64rr:
- case X86::CMOVNS16rr:
- case X86::CMOVNS32rr:
- case X86::CMOVNS64rr:
- case X86::CMOVP16rr:
- case X86::CMOVP32rr:
- case X86::CMOVP64rr:
- case X86::CMOVNP16rr:
- case X86::CMOVNP32rr:
- case X86::CMOVNP64rr:
- case X86::CMOVO16rr:
- case X86::CMOVO32rr:
- case X86::CMOVO64rr:
- case X86::CMOVNO16rr:
- case X86::CMOVNO32rr:
- case X86::CMOVNO64rr: {
- unsigned Opc = 0;
+ case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
+ case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
+ case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
+ case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
+ case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
+ case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
+ case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+ case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
+ case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
+ case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+ case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
+ case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+ case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
+ case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
+ unsigned Opc;
switch (MI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
@@ -2279,7 +2288,7 @@
}
/// getCondFromCmovOpc - return condition code of a CMov opcode.
-static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
+X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
switch (Opc) {
default: return X86::COND_INVALID;
case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
@@ -2402,7 +2411,7 @@
/// whether it has memory operand.
static unsigned getSETFromCond(X86::CondCode CC,
bool HasMemoryOperand) {
- static const unsigned Opc[16][2] = {
+ static const uint16_t Opc[16][2] = {
{ X86::SETAr, X86::SETAm },
{ X86::SETAEr, X86::SETAEm },
{ X86::SETBr, X86::SETBm },
@@ -2429,7 +2438,7 @@
/// register size in bytes, and operand type.
static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
bool HasMemoryOperand) {
- static const unsigned Opc[32][3] = {
+ static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
{ X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
@@ -2762,19 +2771,18 @@
// SrcReg(GR64) -> DestReg(VR64)
if (X86::GR64RegClass.contains(DestReg)) {
- if (X86::VR128RegClass.contains(SrcReg)) {
+ if (X86::VR128RegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
- } else if (X86::VR64RegClass.contains(SrcReg)) {
+ if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
- }
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128RegClass.contains(DestReg))
return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
- else if (X86::VR64RegClass.contains(DestReg))
+ if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
}
@@ -2782,12 +2790,12 @@
// SrcReg(GR32) -> DestReg(FR32)
if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
- // Copy from a FR32 register to a GR32 register.
- return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
- // Copy from a GR32 register to a FR32 register.
- return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
return 0;
}
@@ -2798,7 +2806,7 @@
bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- unsigned Opc = 0;
+ unsigned Opc;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2837,7 +2845,8 @@
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return;
- } else if (X86::GR32RegClass.contains(DestReg)) {
+ }
+ if (X86::GR32RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return;
@@ -2849,7 +2858,8 @@
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF64));
return;
- } else if (X86::GR32RegClass.contains(SrcReg)) {
+ }
+ if (X86::GR32RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r))
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF32));
@@ -3139,11 +3149,19 @@
case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
+ case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
+ case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m:
+ case X86::DEC64_32r: case X86::DEC64_16r:
+ case X86::DEC64_32m: case X86::DEC64_16m:
case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
+ case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
+ case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m:
+ case X86::INC64_32r: case X86::INC64_16r:
+ case X86::INC64_32m: case X86::INC64_16m:
case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
@@ -3193,7 +3211,7 @@
return false;
// There is no use of the destination register, we can replace SUB with CMP.
switch (CmpInstr->getOpcode()) {
- default: llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
@@ -3318,7 +3336,7 @@
if (OldCC != X86::COND_INVALID)
OpcIsSET = true;
else
- OldCC = getCondFromCMovOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
}
if (OldCC == X86::COND_INVALID) return false;
}
@@ -3383,12 +3401,14 @@
Sub->getParent()->insert(MachineBasicBlock::iterator(Sub), Movr0Inst);
}
- // Make sure Sub instruction defines EFLAGS.
+ // Make sure Sub instruction defines EFLAGS and mark the def live.
+ unsigned LastOperand = Sub->getNumOperands() - 1;
assert(Sub->getNumOperands() >= 2 &&
- Sub->getOperand(Sub->getNumOperands()-1).isReg() &&
- Sub->getOperand(Sub->getNumOperands()-1).getReg() == X86::EFLAGS &&
+ Sub->getOperand(LastOperand).isReg() &&
+ Sub->getOperand(LastOperand).getReg() == X86::EFLAGS &&
"EFLAGS should be the last operand of SUB, ADD, OR, XOR, AND");
- Sub->getOperand(Sub->getNumOperands()-1).setIsDef(true);
+ Sub->getOperand(LastOperand).setIsDef(true);
+ Sub->getOperand(LastOperand).setIsDead(false);
CmpInstr->eraseFromParent();
// Modify the condition code of instructions in OpsToUpdate.
@@ -3497,10 +3517,25 @@
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
+ case X86::SETB_C8r:
+ return Expand2AddrUndef(MI, get(X86::SBB8rr));
+ case X86::SETB_C16r:
+ return Expand2AddrUndef(MI, get(X86::SBB16rr));
+ case X86::SETB_C32r:
+ return Expand2AddrUndef(MI, get(X86::SBB32rr));
+ case X86::SETB_C64r:
+ return Expand2AddrUndef(MI, get(X86::SBB64rr));
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+ case X86::AVX_SET0:
+ assert(HasAVX && "AVX not supported");
+ return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
+ case X86::V_SETALLONES:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ case X86::AVX2_SETALLONES:
+ return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -3614,14 +3649,16 @@
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- if (MI->getOpcode() == X86::MOV64r0)
- NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
- else if (MI->getOpcode() == X86::MOV32r0)
- NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
- else if (MI->getOpcode() == X86::MOV16r0)
- NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
- else if (MI->getOpcode() == X86::MOV8r0)
- NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV64r0: Opc = X86::MOV64mi32; break;
+ case X86::MOV32r0: Opc = X86::MOV32mi; break;
+ case X86::MOV16r0: Opc = X86::MOV16mi; break;
+ case X86::MOV8r0: Opc = X86::MOV8mi; break;
+ }
+ if (Opc)
+ NewMI = MakeM0Inst(*this, Opc, MOs, MI);
if (NewMI)
return NewMI;
@@ -3799,7 +3836,8 @@
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3840,7 +3878,8 @@
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3850,15 +3889,12 @@
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::AVX_SET0PSY:
- case X86::AVX_SET0PDY:
case X86::AVX2_SETALLONES:
- case X86::AVX2_SET0:
+ case X86::AVX_SET0:
Alignment = 32;
break;
case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SETALLONES:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -3894,11 +3930,8 @@
switch (LoadMI->getOpcode()) {
case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PSY:
- case X86::AVX_SET0PDY:
- case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
- case X86::AVX2_SET0:
+ case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@@ -3930,15 +3963,12 @@
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
- else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
- Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
- else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
- Opc == X86::AVX2_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -4013,6 +4043,8 @@
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (OpNum == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
}
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
@@ -4102,7 +4134,6 @@
getUndefRegState(MO.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
- unsigned NewOpc = 0;
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
@@ -4115,8 +4146,9 @@
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
+ unsigned NewOpc;
switch (DataMI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
case X86::CMP32ri8:
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.h Tue Jan 15 11:16:16 2013
@@ -61,6 +61,9 @@
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
+ // Turn CMov opcode into condition code.
+ CondCode getCondFromCMovOpc(unsigned Opc);
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrInfo.td Tue Jan 15 11:16:16 2013
@@ -114,7 +114,7 @@
def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain,SDNPSideEffect]>;
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
[SDNPHasChain]>;
def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
@@ -216,6 +216,14 @@
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
+def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -397,7 +405,7 @@
let OperandType = "OPERAND_PCREL",
ParserMatchClass = X86AbsMemAsmOperand,
- PrintMethod = "print_pcrel_imm" in {
+ PrintMethod = "printPCRelImm" in {
def i32imm_pcrel : Operand<i32>;
def i16imm_pcrel : Operand<i16>;
@@ -418,7 +426,7 @@
}
def AVXCC : Operand<i8> {
- let PrintMethod = "printSSECC";
+ let PrintMethod = "printAVXCC";
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -499,7 +507,7 @@
// 64-bits but only 32 bits are significant, and those bits are treated as being
// pc relative.
def i64i32imm_pcrel : Operand<i64> {
- let PrintMethod = "print_pcrel_imm";
+ let PrintMethod = "printPCRelImm";
let ParserMatchClass = X86AbsMemAsmOperand;
let OperandType = "OPERAND_PCREL";
}
@@ -552,14 +560,21 @@
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
@@ -574,6 +589,7 @@
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
+def HasRTM : Predicate<"Subtarget->hasRTM()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -1259,28 +1275,46 @@
// Atomic support
//
-
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
-let Constraints = "$val = $dst" in {
-def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "xchg{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))],
- IIC_XCHG_MEM>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
- "xchg{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))],
- IIC_XCHG_MEM>,
- OpSize;
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
- "xchg{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))],
- IIC_XCHG_MEM>;
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
- "xchg{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))],
- IIC_XCHG_MEM>;
+multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
+ InstrItinClass itin> {
+ let Constraints = "$val = $dst" in {
+ def #NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin>;
+ def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+ itin>;
+ }
+}
+
+defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
+// Swap between registers.
+let Constraints = "$val = $dst" in {
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
@@ -1291,6 +1325,7 @@
"xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
}
+// Swap between EAX and other registers.
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
"xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
@@ -1672,6 +1707,8 @@
include "X86InstrVMX.td"
include "X86InstrSVM.td"
+include "X86InstrTSX.td"
+
// System instructions.
include "X86InstrSystem.td"
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrMMX.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrMMX.td Tue Jan 15 11:16:16 2013
@@ -118,11 +118,11 @@
/// Unary MMX instructions requiring SSSE3.
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (memopmmx addr:$src))))],
@@ -134,11 +134,11 @@
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
let isCommutable = 0 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
@@ -149,11 +149,11 @@
/// PALIGN MMX instructions (require SSSE3).
multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
- def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
- def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
@@ -163,12 +163,10 @@
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, OpndItins itins, Domain d> {
- def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))],
- itins.rr, d>;
- def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))],
- itins.rm, d>;
+ def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+ def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -209,8 +207,14 @@
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
-def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>;
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -243,29 +247,30 @@
[(store (x86mmx VR64:$src), addr:$dst)],
IIC_MMX_MOVQ_RM>;
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
- (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (bitconvert
- (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))))],
- IIC_MMX_MOVQ_RR>;
-
-def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
- (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector
- (i64 (bitconvert (x86mmx VR64:$src))))))],
- IIC_MMX_MOVQ_RR>;
+def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))],
+ IIC_MMX_MOVQ_RR>;
+
+def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64
+ (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))],
+ IIC_MMX_MOVQ_RR>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
- (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
- IIC_MMX_MOVQ_RR>;
-
-def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
- (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [],
- IIC_MMX_MOVQ_RR>;
+def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
+
+def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
@@ -554,20 +559,6 @@
(int_x86_mmx_pmovmskb VR64:$src))]>;
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
- (v2i64 (MOVQI2PQIrm addr:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
- (v2i64 (MOVDI2PDIrm addr:$src))>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
@@ -591,6 +582,7 @@
IIC_MMX_MASKMOV>;
// 64-bit bit convert.
+let Predicates = [HasSSE2] in {
def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
@@ -599,5 +591,6 @@
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
+}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrSSE.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrSSE.td Tue Jan 15 11:16:16 2013
@@ -251,35 +251,37 @@
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
-def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
-def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))),
(v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
-def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))),
(v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
-def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))),
(v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
-def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))),
(v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
-def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))),
(v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
// A 128-bit subvector insert to the first 256-bit vector position
// is a subregister copy that needs no instruction.
-def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+let AddedComplexity = 25 in { // to give priority over vinsertf128rm
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+}
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
@@ -362,7 +364,7 @@
def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
}
-// Alias instructions that map fld0 to pxor for sse.
+// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1 in {
@@ -382,11 +384,11 @@
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, neverHasSideEffects = 1 in {
-def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
+ isPseudo = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
@@ -394,35 +396,29 @@
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-// The same as done above but for AVX. The 256-bit ISA does not support PI,
+// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
// and doesn't need it because on sandy bridge the register is set to zero
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
-// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-let Predicates = [HasAVX] in {
-def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
-}
-let Predicates = [HasAVX2], neverHasSideEffects = 1 in
-def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
- []>, VEX_4V;
+ isPseudo = 1, Predicates = [HasAVX] in {
+def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
-let Predicates = [HasAVX2], AddedComplexity = 5 in {
- def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
+let Predicates = [HasAVX] in
+ def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
}
-// AVX has no support for 256-bit integer instructions, but since the 128-bit
+// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+let Predicates = [HasAVX1Only] in {
def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
@@ -438,22 +434,17 @@
def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+}
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementation, it does not expand the instructions below like
-// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
- let Predicates = [HasAVX] in
- def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+ isPseudo = 1 in {
+ def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
- def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+ def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
@@ -605,27 +596,27 @@
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+ (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i32 0),
(v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+ (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i64 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
@@ -704,7 +695,7 @@
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
@@ -738,7 +729,7 @@
(MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSD to the lower bits.
@@ -822,16 +813,16 @@
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
- TB, VEX;
+ TB, VEX, VEX_L;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
"movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- TB, OpSize, VEX;
+ TB, OpSize, VEX, VEX_L;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
- TB, VEX;
+ TB, VEX, VEX_L;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
- TB, OpSize, VEX;
+ TB, OpSize, VEX, VEX_L;
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
TB;
@@ -864,19 +855,19 @@
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
// For disassembler
let isCodeGenOnly = 1 in {
@@ -899,33 +890,33 @@
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86vzmovl
- (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl
- (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl
- (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl
- (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
}
@@ -975,10 +966,10 @@
(VMOVUPDmr addr:$dst, VR128:$src)>;
}
-let Predicates = [HasSSE1] in
+let Predicates = [UseSSE1] in
def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
(MOVUPSmr addr:$dst, VR128:$src)>;
-let Predicates = [HasSSE2] in
+let Predicates = [UseSSE2] in
def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
(MOVUPDmr addr:$dst, VR128:$src)>;
@@ -1028,12 +1019,52 @@
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
// The instructions selected below are then converted to MOVDQA/MOVDQU
// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(alignedloadv2i64 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
@@ -1180,7 +1211,7 @@
(VMOVLPDmr addr:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
@@ -1205,7 +1236,7 @@
(MOVLPSmr addr:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
@@ -1271,7 +1302,7 @@
(VMOVHPSrm VR128:$src1, addr:$src2)>;
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
- // is during lowering, where it's not possible to recognize the load fold
+ // is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
// and the fold opportunity reappears.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -1279,7 +1310,7 @@
(VMOVHPDrm VR128:$src1, addr:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// MOVHPS patterns
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
@@ -1289,9 +1320,9 @@
(MOVHPSrm VR128:$src1, addr:$src2)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
- // is during lowering, where it's not possible to recognize the load fold
+ // is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
// and the fold opportunity reappears.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -1346,7 +1377,7 @@
(VMOVHLPSrr VR128:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// MOVLHPS patterns
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
@@ -1456,7 +1487,7 @@
def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1628,12 +1659,12 @@
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- TB, VEX, Requires<[HasAVX]>;
+ TB, VEX, VEX_L, Requires<[HasAVX]>;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- TB, Requires<[HasSSE2]>;
+ TB, Requires<[UseSSE2]>;
/// SSE 2 Only
@@ -1663,7 +1694,7 @@
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1684,13 +1715,13 @@
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
}
// Convert scalar single to scalar double
@@ -1709,30 +1740,28 @@
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
}
-let AddedComplexity = 1 in { // give AVX priority
- def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
- def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
-
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
-} // AddedComplexity = 1
+def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1740,9 +1769,9 @@
// Since these loads aren't folded into the fextend, we have to match it
// explicitly here.
def : Pat<(fextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
+ (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1762,13 +1791,13 @@
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
}
// Convert packed single/double fp to doubleword
@@ -1785,12 +1814,12 @@
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
@@ -1824,7 +1853,7 @@
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1860,12 +1889,12 @@
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1904,7 +1933,7 @@
(VCVTTPS2DQYrm addr:$src)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(CVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -1945,7 +1974,7 @@
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1978,31 +2007,31 @@
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
-let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
-let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB;
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2019,11 +2048,11 @@
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
}
let neverHasSideEffects = 1, mayLoad = 1 in
@@ -2066,7 +2095,7 @@
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2096,24 +2125,41 @@
(VCVTDQ2PSYrm addr:$src)>;
// Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
(VCVTPD2PSYrm addr:$src)>;
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
- def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;
}
+let Predicates = [UseSSE2] in {
+ // Match fround and fextend for 128 conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (CVTPD2PSrm addr:$src)>;
+
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, SDNode OpNode, ValueType VT,
+ Operand CC, SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm, string asm_alt,
OpndItins itins> {
def rr : SIi8<0xC2, MRMSrcReg,
@@ -2259,7 +2305,7 @@
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, Intrinsic Int, string asm,
+ Operand CC, Intrinsic Int, string asm,
string asm_alt, Domain d> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
@@ -2292,11 +2338,11 @@
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle>, TB, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
@@ -2328,14 +2374,14 @@
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
@@ -2366,13 +2412,13 @@
memopv4f32, SSEPackedSingle>, TB, VEX_4V;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2412,7 +2458,7 @@
(VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
(SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
@@ -2420,7 +2466,7 @@
(SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// Generic SHUFPD patterns
def : Pat<(v2i64 (X86Shufp VR128:$src1,
(memopv2i64 addr:$src2), (i8 imm:$imm))),
@@ -2466,16 +2512,16 @@
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2492,7 +2538,27 @@
SSEPackedDouble>, TB, OpSize;
} // Constraints = "$src1 = $dst"
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX1Only] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
@@ -2501,7 +2567,7 @@
(VUNPCKLPDrr VR128:$src, VR128:$src)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
@@ -2532,10 +2598,11 @@
"movmskpd", SSEPackedDouble>, TB,
OpSize, VEX;
defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, TB, VEX;
+ "movmskps", SSEPackedSingle>, TB,
+ VEX, VEX_L;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
"movmskpd", SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, VEX_L;
def : Pat<(i32 (X86fgetsign FR32:$src)),
(VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -2556,11 +2623,11 @@
OpSize, VEX;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle>, TB, VEX, VEX_L;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, VEX_L;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2570,16 +2637,16 @@
def : Pat<(i32 (X86fgetsign FR32:$src)),
(MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
(MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
(MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
(MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -2638,13 +2705,13 @@
let Predicates = [HasAVX2] in {
defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+ i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
}
//===----------------------------------------------------------------------===//
@@ -2675,14 +2742,12 @@
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let mayLoad = 0 in {
- defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
- defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
- defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-}
+defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+ SSE_BIT_ITINS_P>;
+defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
@@ -2732,7 +2797,7 @@
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
@@ -2740,7 +2805,7 @@
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(memopv4i64 addr:$src2)))], 0>,
- TB, OpSize, VEX_4V;
+ TB, OpSize, VEX_4V, VEX_L;
}
// AVX 256-bit packed logical ops forms
@@ -2786,27 +2851,23 @@
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins,
bit Is2Addr = 1> {
- let mayLoad = 0 in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
TB, OpSize;
- }
}
multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
SDNode OpNode,
SizeItins itins> {
- let mayLoad = 0 in {
- defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
- TB;
- defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ TB, VEX_L;
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
- TB, OpSize;
- }
+ TB, OpSize, VEX_L;
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2838,11 +2899,11 @@
SizeItins itins> {
defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
- SSEPackedSingle, itins.s, 0>, TB;
+ SSEPackedSingle, itins.s, 0>, TB, VEX_L;
defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
- SSEPackedDouble, itins.d, 0>, TB, OpSize;
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
}
// Binary Arithmetic instructions
@@ -2864,7 +2925,8 @@
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
@@ -2915,6 +2977,23 @@
}
}
+let isCodeGenOnly = 1 in {
+ defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
+ defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
+ let Constraints = "$src1 = $dst" in {
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ }
+}
+
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
@@ -2952,7 +3031,7 @@
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[HasSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
@@ -2966,7 +3045,7 @@
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in
+ let mayLoad = 1 in {
def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
@@ -2974,6 +3053,7 @@
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -2993,11 +3073,11 @@
def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>;
+ itins.rr>, VEX_L;
def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, VEX_L;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3019,11 +3099,11 @@
def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V4F32Int VR256:$src))],
- itins.rr>;
+ itins.rr>, VEX_L;
def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, VEX_L;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3036,7 +3116,7 @@
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
@@ -3046,20 +3126,20 @@
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+let hasSideEffects = 0 in
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- let neverHasSideEffects = 1 in {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in
+ let mayLoad = 1 in {
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- }
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3079,11 +3159,11 @@
def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>;
+ itins.rr>, VEX_L;
def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>;
+ itins.rm>, VEX_L;
}
/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
@@ -3105,11 +3185,11 @@
def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V2F64Int VR256:$src))],
- itins.rr>;
+ itins.rr>, VEX_L;
def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
- itins.rm>;
+ itins.rm>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -3150,7 +3230,6 @@
SSE_RCPP>, VEX;
}
-let AddedComplexity = 1 in {
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -3173,9 +3252,8 @@
def : Pat<(f32 (X86frcp (load addr:$src))),
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX, OptForSize]>;
-}
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3215,17 +3293,52 @@
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Intrinsic F32Int, OpndItins itins> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>;
+ }
+}
+
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
- SSE_SQRTS>,
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+ SSE_SQRTS>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
SSE_SQRTS>;
-defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
- SSE_RCPS>,
+let Predicates = [UseSSE1] in {
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (RSQRTSSr_Int VR128:$src, VR128:$src)>;
+}
+
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+ SSE_RCPS>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
+let Predicates = [UseSSE1] in {
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (RCPSSr_Int VR128:$src, VR128:$src)>;
+}
// There is no f64 version of the reciprocal approximation instructions.
@@ -3263,20 +3376,20 @@
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f32 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_L;
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_L;
let ExeDomain = SSEPackedInt in
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_L;
}
let AddedComplexity = 400 in { // Prefer non-temporal versions
@@ -3296,7 +3409,7 @@
IIC_SSE_MOVNT>;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
@@ -3385,14 +3498,14 @@
VEX;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
- VEX;
+ VEX, VEX_L;
}
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
VEX;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
- VEX;
+ VEX, VEX_L;
// For Disassembler
let isCodeGenOnly = 1 in {
@@ -3402,16 +3515,14 @@
VEX;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>,
- VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVU_P_RR>,
VEX;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>,
- VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
let canFoldAsLoad = 1, mayLoad = 1 in {
@@ -3420,14 +3531,14 @@
VEX;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
- VEX;
+ VEX, VEX_L;
let Predicates = [HasAVX] in {
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
XS, VEX;
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
- XS, VEX;
+ XS, VEX, VEX_L;
}
}
@@ -3439,14 +3550,14 @@
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
- VEX;
+ VEX, VEX_L;
let Predicates = [HasAVX] in {
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
XS, VEX;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
- XS, VEX;
+ XS, VEX, VEX_L;
}
}
@@ -3456,7 +3567,7 @@
def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
// For Disassembler
let isCodeGenOnly = 1 in {
@@ -3466,7 +3577,7 @@
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
let canFoldAsLoad = 1, mayLoad = 1 in {
@@ -3478,7 +3589,7 @@
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
IIC_SSE_MOVU_P_RM>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
let mayStore = 1 in {
@@ -3490,7 +3601,7 @@
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/],
IIC_SSE_MOVU_P_MR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
// Intrinsic forms of MOVDQU load and store
@@ -3504,7 +3615,7 @@
"movdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
IIC_SSE_MOVU_P_MR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -3682,82 +3793,82 @@
let Predicates = [HasAVX2] in {
defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+ i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
// Intrinsic forms
defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
VR256, memopv4i64, i256mem,
- SSE_PMADD, 1, 0>, VEX_4V;
+ SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -3893,30 +4004,30 @@
let Predicates = [HasAVX2] in {
defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
VR256, v16i16, v8i16, bc_v8i16,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
VR256, v8i32, v4i32, bc_v4i32,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
VR256, v4i64, v2i64, bc_v2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
VR256, v16i16, v8i16, bc_v8i16,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
VR256, v8i32, v4i32, bc_v4i32,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
VR256, v4i64, v2i64, bc_v2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
VR256, v16i16, v8i16, bc_v8i16,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
let ExeDomain = SSEPackedInt in {
// 256-bit logical shifts.
@@ -3925,13 +4036,13 @@
"vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR256:$dst,
(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
def VPSRLDQYri : PDIi8<0x73, MRM3r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
"vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR256:$dst,
(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
// PSRADQYri doesn't exist in SSE[1-3].
}
} // Predicates = [HasAVX2]
@@ -4002,7 +4113,7 @@
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
@@ -4045,22 +4156,22 @@
let Predicates = [HasAVX2] in {
defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -4103,13 +4214,13 @@
let Predicates = [HasAVX2] in {
defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -4179,12 +4290,15 @@
}
let Predicates = [HasAVX2] in {
- defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
- defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
- defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
+ defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
+ TB, OpSize, VEX,VEX_L;
+ defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
+ XS, VEX, VEX_L;
+ defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
+ XD, VEX, VEX_L;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
let AddedComplexity = 5 in
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
@@ -4260,22 +4374,22 @@
let Predicates = [HasAVX2] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
- bc_v32i8>, VEX_4V;
+ bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
- bc_v16i16>, VEX_4V;
+ bc_v16i16>, VEX_4V, VEX_L;
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
- bc_v8i32>, VEX_4V;
+ bc_v8i32>, VEX_4V, VEX_L;
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
- bc_v4i64>, VEX_4V;
+ bc_v4i64>, VEX_4V, VEX_L;
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
- bc_v32i8>, VEX_4V;
+ bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
- bc_v16i16>, VEX_4V;
+ bc_v16i16>, VEX_4V, VEX_L;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
- bc_v8i32>, VEX_4V;
+ bc_v8i32>, VEX_4V, VEX_L;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
- bc_v4i64>, VEX_4V;
+ bc_v4i64>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -4299,28 +4413,6 @@
}
} // ExeDomain = SSEPackedInt
-// Patterns for using AVX1 instructions with integer vectors
-// Here to give AVX2 priority
-let Predicates = [HasAVX] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-}
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Extract and Insert
//===---------------------------------------------------------------------===//
@@ -4369,7 +4461,7 @@
}
let Constraints = "$src1 = $dst" in
- defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -4389,9 +4481,9 @@
let Predicates = [HasAVX2] in {
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
+ [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L;
def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -4530,7 +4622,7 @@
// Move Packed Doubleword Int first element to Doubleword Int
//
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
@@ -4646,14 +4738,14 @@
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
+ (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -4693,7 +4785,7 @@
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
- Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+ Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
@@ -4736,7 +4828,7 @@
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4747,7 +4839,7 @@
(VMOVZQI2PQIrm addr:$src)>;
}
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
@@ -4777,7 +4869,7 @@
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -4792,7 +4884,7 @@
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
let AddedComplexity = 20 in {
@@ -4802,7 +4894,7 @@
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
- let Predicates = [HasSSE2] in {
+ let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
@@ -4854,9 +4946,9 @@
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
v4f32, VR128, memopv4f32, f128mem>, VEX;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, memopv8f32, f256mem>, VEX;
+ v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, memopv8f32, f256mem>, VEX;
+ v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem>;
@@ -4882,7 +4974,7 @@
(VMOVSLDUPYrm addr:$src)>;
}
-let Predicates = [HasSSE3] in {
+let Predicates = [UseSSE3] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(MOVSHDUPrr VR128:$src)>;
def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -4924,7 +5016,7 @@
let Predicates = [HasAVX] in {
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
@@ -4951,7 +5043,7 @@
(VMOVDDUPYrr VR256:$src)>;
}
-let Predicates = [HasSSE3] in {
+let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
@@ -4973,7 +5065,8 @@
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
+ VEX, VEX_L;
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
@@ -5006,16 +5099,16 @@
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+ f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+ f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L;
}
}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
f128mem, SSE_ALU_F32P>, TB, XD;
@@ -5067,9 +5160,9 @@
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
X86fhsub, 0>, VEX_4V;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V, VEX_L;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -5077,9 +5170,9 @@
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
X86fhsub, 0>, VEX_4V;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V, VEX_L;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V, VEX_L;
}
}
@@ -5145,11 +5238,11 @@
let Predicates = [HasAVX2] in {
defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
- int_x86_avx2_pabs_b>, VEX;
+ int_x86_avx2_pabs_b>, VEX, VEX_L;
defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
- int_x86_avx2_pabs_w>, VEX;
+ int_x86_avx2_pabs_w>, VEX, VEX_L;
defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
- int_x86_avx2_pabs_d>, VEX;
+ int_x86_avx2_pabs_d>, VEX, VEX_L;
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5288,37 +5381,37 @@
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
memopv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
- int_x86_avx2_phadd_sw>, VEX_4V;
+ int_x86_avx2_phadd_sw>, VEX_4V, VEX_L;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
- int_x86_avx2_phsub_sw>, VEX_4V;
+ int_x86_avx2_phsub_sw>, VEX_4V, VEX_L;
defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
- int_x86_avx2_pmadd_ub_sw>, VEX_4V;
+ int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L;
}
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
- int_x86_avx2_pmul_hr_sw>, VEX_4V;
+ int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L;
}
// None of these have i8 immediate fields.
@@ -5397,8 +5490,8 @@
let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Predicates = [HasAVX2] in
- defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
-let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
+ defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palign<"palignr">;
let Predicates = [HasAVX2] in {
@@ -5423,7 +5516,7 @@
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
-let Predicates = [HasSSSE3] in {
+let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
@@ -5504,17 +5597,17 @@
let Predicates = [HasAVX2] in {
defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
- int_x86_avx2_pmovsxbw>, VEX;
+ int_x86_avx2_pmovsxbw>, VEX, VEX_L;
defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
- int_x86_avx2_pmovsxwd>, VEX;
+ int_x86_avx2_pmovsxwd>, VEX, VEX_L;
defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
- int_x86_avx2_pmovsxdq>, VEX;
+ int_x86_avx2_pmovsxdq>, VEX, VEX_L;
defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
- int_x86_avx2_pmovzxbw>, VEX;
+ int_x86_avx2_pmovzxbw>, VEX, VEX_L;
defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
- int_x86_avx2_pmovzxwd>, VEX;
+ int_x86_avx2_pmovzxwd>, VEX, VEX_L;
defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
- int_x86_avx2_pmovzxdq>, VEX;
+ int_x86_avx2_pmovzxdq>, VEX, VEX_L;
}
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
@@ -5530,64 +5623,88 @@
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVZXDQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load.
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVZXDQrm addr:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5607,7 +5724,7 @@
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
}
@@ -5651,13 +5768,13 @@
let Predicates = [HasAVX2] in {
defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
- int_x86_avx2_pmovsxbd>, VEX;
+ int_x86_avx2_pmovsxbd>, VEX, VEX_L;
defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
- int_x86_avx2_pmovsxwq>, VEX;
+ int_x86_avx2_pmovsxwq>, VEX, VEX_L;
defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
- int_x86_avx2_pmovzxbd>, VEX;
+ int_x86_avx2_pmovzxbd>, VEX, VEX_L;
defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
- int_x86_avx2_pmovzxwq>, VEX;
+ int_x86_avx2_pmovzxwq>, VEX, VEX_L;
}
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
@@ -5678,7 +5795,7 @@
(VPMOVZXWQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
(PMOVSXBDrm addr:$src)>;
@@ -5726,9 +5843,9 @@
}
let Predicates = [HasAVX2] in {
defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
- int_x86_avx2_pmovsxbq>, VEX;
+ int_x86_avx2_pmovsxbq>, VEX, VEX_L;
defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
- int_x86_avx2_pmovzxbq>, VEX;
+ int_x86_avx2_pmovzxbq>, VEX, VEX_L;
}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
@@ -5746,7 +5863,7 @@
(VPMOVZXBQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5759,6 +5876,100 @@
(PMOVZXBQrm addr:$src)>;
}
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),
+ (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),
+ (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),
+ (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (VPMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (PMOVZXDQrm addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Extract Instructions
//===----------------------------------------------------------------------===//
@@ -5892,7 +6103,7 @@
imm:$src2))),
addr:$dst),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
@@ -6139,7 +6350,7 @@
defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
memopv8f32, memopv4f64,
int_x86_avx_round_ps_256,
- int_x86_avx_round_pd_256>, VEX;
+ int_x86_avx_round_pd_256>, VEX, VEX_L;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
@@ -6164,6 +6375,15 @@
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
def : Pat<(f64 (ftrunc FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (ffloor VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (ffloor VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x1))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6173,26 +6393,33 @@
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
-def : Pat<(ffloor FR32:$src),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
-def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
-def : Pat<(f32 (fnearbyint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
-def : Pat<(f64 (fnearbyint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
-def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
-def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
-def : Pat<(f32 (frint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
-def : Pat<(f64 (frint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
-def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
-def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+let Predicates = [UseSSE41] in {
+ def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x1))>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
@@ -6213,11 +6440,11 @@
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
- OpSize, VEX;
+ OpSize, VEX, VEX_L;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
- OpSize, VEX;
+ OpSize, VEX, VEX_L;
}
let Defs = [EFLAGS] in {
@@ -6246,11 +6473,13 @@
let Defs = [EFLAGS], Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
-defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>,
+ VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
-defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
+ VEX_L;
}
}
@@ -6330,7 +6559,7 @@
(bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
Intrinsic IntId256> {
let isCommutable = 1 in
@@ -6373,25 +6602,25 @@
let Predicates = [HasAVX2] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw>, VEX_4V;
+ int_x86_avx2_packusdw>, VEX_4V, VEX_L;
defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
- int_x86_avx2_pmins_b>, VEX_4V;
+ int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
- int_x86_avx2_pmins_d>, VEX_4V;
+ int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
- int_x86_avx2_pminu_d>, VEX_4V;
+ int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
- int_x86_avx2_pminu_w>, VEX_4V;
+ int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
- int_x86_avx2_pmaxs_b>, VEX_4V;
+ int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
- int_x86_avx2_pmaxs_d>, VEX_4V;
+ int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
- int_x86_avx2_pmaxu_d>, VEX_4V;
+ int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
- int_x86_avx2_pmaxu_w>, VEX_4V;
+ int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
- int_x86_avx2_pmul_dq>, VEX_4V;
+ int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -6437,9 +6666,9 @@
}
let Predicates = [HasAVX2] in {
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V;
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
- memopv4i64, i256mem, 0>, VEX_4V;
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -6482,13 +6711,15 @@
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
VR128, memopv4f32, f128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv8f32, f256mem, 0>, VEX_4V;
+ int_x86_avx_blend_ps_256, VR256, memopv8f32,
+ f256mem, 0>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
VR128, memopv2f64, f128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv4f64, f256mem, 0>, VEX_4V;
+ int_x86_avx_blend_pd_256,VR256, memopv4f64,
+ f256mem, 0>, VEX_4V, VEX_L;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, memopv2i64, i128mem, 0>, VEX_4V;
@@ -6503,15 +6734,15 @@
VR128, memopv2f64, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv8f32, i256mem, 0>, VEX_4V;
+ VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
}
}
@@ -6562,13 +6793,13 @@
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
memopv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
- memopv4f64, int_x86_avx_blendv_pd_256>;
+ memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
memopv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
- memopv8f32, int_x86_avx_blendv_ps_256>;
+ memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
memopv2i64, int_x86_sse41_pblendvb>;
@@ -6576,7 +6807,7 @@
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- memopv4i64, int_x86_avx2_pblendvb>;
+ memopv4i64, int_x86_avx2_pblendvb>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -6679,7 +6910,7 @@
def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
(PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
(v16i8 VR128:$src2))),
(PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
@@ -6717,7 +6948,7 @@
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
- OpSize, VEX;
+ OpSize, VEX, VEX_L;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -6753,7 +6984,7 @@
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- memopv4i64, i256mem, 0>, VEX_4V;
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -6771,34 +7002,31 @@
imm:$src3))]>;
def MEM : PseudoI<(outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
- VR128:$src1, (load addr:$src2), imm:$src3))]>;
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
}
let Defs = [EFLAGS], usesCustomInserter = 1 in {
- let AddedComplexity = 1 in
- defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
- defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
}
-let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
- def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+multiclass pcmpistrm_SS42AI<string asm> {
+ def rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
let mayLoad = 1 in
- def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ def rm :SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
}
let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
- def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
- let mayLoad = 1 in
- def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ let Predicates = [HasAVX] in
+ defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
+ defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ;
}
// Packed Compare Explicit Length Strings, Return Mask
@@ -6809,74 +7037,103 @@
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
def MEM : PseudoI<(outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+ (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
}
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
- let AddedComplexity = 1 in
- defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
- defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
}
-let Predicates = [HasAVX],
- Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
- def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+multiclass SS42AI_pcmpestrm<string asm> {
+ def rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
let mayLoad = 1 in
- def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ def rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
- def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
- let mayLoad = 1 in
- def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ let Predicates = [HasAVX] in
+ defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
+ defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">;
}
// Packed Compare Implicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
- multiclass SS42AI_pcmpistri<string asm> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, OpSize;
- let mayLoad = 1 in
- def rm : SS42AI<0x63, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, OpSize;
- }
+multiclass pseudo_pcmpistri<string asm> {
+ def REG : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
+ def MEM : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
}
-let Predicates = [HasAVX] in
-defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
-defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
+ defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
+}
+
+multiclass SS42AI_pcmpistri<string asm> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+ defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
+}
// Packed Compare Explicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
- multiclass SS42AI_pcmpestri<string asm> {
- def rr : SS42AI<0x61, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, OpSize;
- let mayLoad = 1 in
- def rm : SS42AI<0x61, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, OpSize;
- }
+multiclass pseudo_pcmpestri<string asm> {
+ def REG : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
+ imm:$src5))]>;
}
-let Predicates = [HasAVX] in
-defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
-defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
+ defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
+}
+
+multiclass SS42AI_pcmpestri<string asm> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+ defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
+}
//===----------------------------------------------------------------------===//
// SSE4.2 - CRC Instructions
@@ -7167,27 +7424,27 @@
def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
int_x86_avx_vbroadcast_ss>;
def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256>;
+ int_x86_avx_vbroadcast_ss_256>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256>;
+ int_x86_avx_vbroadcast_sd_256>, VEX_L;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
- int_x86_avx_vbroadcastf128_pd_256>;
+ int_x86_avx_vbroadcastf128_pd_256>, VEX_L;
let ExeDomain = SSEPackedSingle in {
def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
int_x86_avx2_vbroadcast_ss_ps>;
def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
- int_x86_avx2_vbroadcast_ss_ps_256>;
+ int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+ int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
- int_x86_avx2_vbroadcasti128>;
+ int_x86_avx2_vbroadcasti128>, VEX_L;
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
@@ -7201,50 +7458,69 @@
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ []>, VEX_4V, VEX_L;
let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ []>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX] in {
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+let Predicates = [HasAVX1Only] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
}
@@ -7256,64 +7532,69 @@
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, VEX;
+ []>, VEX, VEX_L;
let mayStore = 1 in
def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
(ins f128mem:$dst, VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, VEX;
-}
-
-// Extract and store.
-let Predicates = [HasAVX] in {
- def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
-
- def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ []>, VEX, VEX_L;
}
// AVX1 patterns
let Predicates = [HasAVX] in {
-def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4f32 (VEXTRACTF128rr
(v8f32 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2f64 (VEXTRACTF128rr
(v4f64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+
+def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
}
//===----------------------------------------------------------------------===//
@@ -7331,7 +7612,7 @@
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7339,7 +7620,7 @@
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedSingle in
@@ -7387,13 +7668,13 @@
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
+ memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
+ memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -7421,38 +7702,38 @@
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 imm:$src3))))]>, VEX_4V;
+ (i8 imm:$src3))))]>, VEX_4V, VEX_L;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
- (i8 imm:$src3)))]>, VEX_4V;
+ (i8 imm:$src3)))]>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX] in {
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
- (memopv8f32 addr:$src2), (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
(bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
(memopv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
- (memopv4f64 addr:$src2), (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
(bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
@@ -7503,9 +7784,9 @@
let Predicates = [HasAVX, HasF16C] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
}
//===----------------------------------------------------------------------===//
@@ -7537,7 +7818,7 @@
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
VR128, memopv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, memopv4i64, i256mem>;
+ VR256, memopv4i64, i256mem>, VEX_L;
}
//===----------------------------------------------------------------------===//
@@ -7556,11 +7837,12 @@
(Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
+ [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+ (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+ VEX, VEX_L;
}
defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
@@ -7639,19 +7921,22 @@
}
// AVX1 broadcast patterns
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
@@ -7692,7 +7977,8 @@
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V;
+ (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
+ VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
@@ -7700,7 +7986,7 @@
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
(bitconvert (mem_frag addr:$src2)))))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
}
defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
@@ -7714,14 +8000,15 @@
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX;
+ (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
+ VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, VEX;
+ (i8 imm:$src2))))]>, VEX, VEX_L;
}
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
@@ -7731,20 +8018,18 @@
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
//
-let AddedComplexity = 1 in {
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 imm:$src3))))]>, VEX_4V;
+ (i8 imm:$src3))))]>, VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
- (i8 imm:$src3)))]>, VEX_4V;
-}
+ (i8 imm:$src3)))]>, VEX_4V, VEX_L;
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
+let Predicates = [HasAVX2] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
@@ -7771,31 +8056,51 @@
def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ []>, VEX_4V, VEX_L;
let mayLoad = 1 in
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ []>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
+let Predicates = [HasAVX2] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
}
//===----------------------------------------------------------------------===//
@@ -7806,29 +8111,47 @@
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
- VEX;
+ VEX, VEX_L;
let neverHasSideEffects = 1, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
- "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX, VEX_L;
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2i64 (VEXTRACTI128rr
(v4i64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4i32 (VEXTRACTI128rr
(v8i32 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v8i16 (VEXTRACTI128rr
(v16i16 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v16i8 (VEXTRACTI128rr
(v32i8 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
}
//===----------------------------------------------------------------------===//
@@ -7844,7 +8167,8 @@
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V, VEX_L;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7852,7 +8176,7 @@
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
@@ -7890,14 +8214,14 @@
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
(vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
- VEX_4V;
+ VEX_4V, VEX_L;
}
defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrShiftRotate.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrShiftRotate.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrShiftRotate.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrShiftRotate.td Tue Jan 15 11:16:16 2013
@@ -839,6 +839,16 @@
} // Defs = [EFLAGS]
+def ROT32L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
+ return getI8Imm(32 - N->getZExtValue());
+}]>;
+
+def ROT64L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
+ return getI8Imm(64 - N->getZExtValue());
+}]>;
+
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
@@ -873,4 +883,72 @@
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+
+ // Prefer RORX which is non-destructive and doesn't update EFLAGS.
+ let AddedComplexity = 10 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
+ // immedidate shift, i.e. the following code is considered better
+ //
+ // mov %edi, %esi
+ // shl $imm, %esi
+ // ... %edi, ...
+ //
+ // than
+ //
+ // movb $imm, %sil
+ // shlx %sil, %edi, %esi
+ // ... %edi, ...
+ //
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
+ //
+ // mov (%ecx), %esi
+ // shl $imm, $esi
+ //
+ // over
+ //
+ // movb $imm %al
+ // shlx %al, (%ecx), %esi
+ //
+ // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
+ // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86InstrXOP.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86InstrXOP.td Tue Jan 15 11:16:16 2013
@@ -75,10 +75,10 @@
PatFrag memop> {
def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int VR256:$src))]>, VEX;
+ [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+ [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L;
}
let isAsmParserOnly = 1 in {
@@ -238,7 +238,7 @@
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
- VEX_4V, VEX_I8IMM;
+ VEX_4V, VEX_I8IMM, VEX_L;
def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr,
@@ -246,7 +246,7 @@
[(set VR256:$dst,
(Int VR256:$src1, VR256:$src2,
(bitconvert (memopv4i64 addr:$src3))))]>,
- VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+ VEX_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -254,7 +254,7 @@
[(set VR256:$dst,
(Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
VR256:$src3))]>,
- VEX_4V, VEX_I8IMM;
+ VEX_4V, VEX_I8IMM, VEX_L;
}
let isAsmParserOnly = 1 in {
@@ -287,20 +287,21 @@
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>;
+ (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
- VEX_W, MemOp4;
+ VEX_W, MemOp4, VEX_L;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>;
+ (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,
+ VEX_L;
}
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86JITInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86JITInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86JITInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86JITInfo.cpp Tue Jan 15 11:16:16 2013
@@ -532,6 +532,15 @@
#endif
}
+template<typename T> static void addUnaligned(void *Pos, T Delta) {
+ T Value;
+ std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
+ sizeof(T));
+ Value += Delta;
+ std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
+ sizeof(T));
+}
+
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
@@ -545,24 +554,24 @@
// PC relative relocation, add the relocated value to the value already in
// memory, after we adjust it for where the PC is.
ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_picrel_word: {
// PIC base relative relocation, add the relocated value to the value
// already in memory, after we adjust it for where the PIC base is.
ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_absolute_word:
case X86::reloc_absolute_word_sext:
// Absolute relocation, just add the relocated value to the value already
// in memory.
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
case X86::reloc_absolute_dword:
- *((intptr_t*)RelocPos) += ResultPtr;
+ addUnaligned<intptr_t>(RelocPos, ResultPtr);
break;
}
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.cpp Tue Jan 15 11:16:16 2013
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
@@ -29,6 +28,31 @@
#include "llvm/ADT/SmallString.h"
using namespace llvm;
+namespace {
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
+ X86AsmPrinter &AsmPrinter;
+public:
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+} // end anonymous namespace
+
X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
X86AsmPrinter &asmprinter)
: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
@@ -43,15 +67,11 @@
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+ assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
SmallString<128> Name;
- if (!MO.isGlobal()) {
- assert(MO.isSymbol());
- Name += MAI.getGlobalPrefix();
- Name += MO.getSymbolName();
- } else {
+ if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -61,6 +81,11 @@
isImplicitlyPrivate = true;
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ } else if (MO.isSymbol()) {
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else if (MO.isMBB()) {
+ Name += MO.getMBB()->getSymbol()->getName();
}
// If the target flags on the operand changes the name of the symbol, do that
@@ -191,7 +216,7 @@
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
- if (!MO.isJTI() && MO.getOffset())
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
Ctx);
@@ -324,9 +349,6 @@
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), Ctx));
- break;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
@@ -371,18 +393,8 @@
case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
- case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
- case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
- case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
- case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
- case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
- case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
- case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
- case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
- case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
Removed: llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.h?rev=172540&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86MCInstLower.h (removed)
@@ -1,52 +0,0 @@
-//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_MCINSTLOWER_H
-#define X86_MCINSTLOWER_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
- class MCAsmInfo;
- class MCContext;
- class MCInst;
- class MCOperand;
- class MCSymbol;
- class MachineInstr;
- class MachineFunction;
- class MachineModuleInfoMachO;
- class MachineOperand;
- class Mangler;
- class TargetMachine;
- class X86AsmPrinter;
-
-/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
- MCContext &Ctx;
- Mangler *Mang;
- const MachineFunction &MF;
- const TargetMachine &TM;
- const MCAsmInfo &MAI;
- X86AsmPrinter &AsmPrinter;
-public:
- X86MCInstLower(Mangler *mang, const MachineFunction &MF,
- X86AsmPrinter &asmprinter);
-
- void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
- MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-private:
- MachineModuleInfoMachO &getMachOMMI() const;
-};
-
-}
-
-#endif
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.cpp Tue Jan 15 11:16:16 2013
@@ -106,23 +106,7 @@
int
X86RegisterInfo::getSEHRegNum(unsigned i) const {
- int reg = X86_MC::getX86RegNum(i);
- switch (i) {
- case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
- case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
- case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
- case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
- case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
- case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
- case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
- case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
- case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
- case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
- case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
- case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
- reg += 8;
- }
- return reg;
+ return getEncodingValue(i);
}
const TargetRegisterClass *
@@ -245,15 +229,26 @@
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
+ bool oclBiCall = false;
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (MF) {
callsEHReturn = MF->getMMI().callsEHReturn();
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
}
if (ghcCall)
return CSR_NoRegs_SaveList;
+ if (oclBiCall) {
+ if (HasAVX && IsWin64)
+ return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+ if (HasAVX && Is64Bit)
+ return CSR_64_Intel_OCL_BI_AVX_SaveList;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_SaveList;
+ }
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
@@ -268,6 +263,16 @@
const uint32_t*
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+
+ if (CC == CallingConv::Intel_OCL_BI) {
+ if (IsWin64 && HasAVX)
+ return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
+ if (Is64Bit && HasAVX)
+ return CSR_64_Intel_OCL_BI_AVX_RegMask;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_RegMask;
+ }
if (CC == CallingConv::GHC)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
@@ -277,6 +282,11 @@
return CSR_64_RegMask;
}
+const uint32_t*
+X86RegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -398,8 +408,9 @@
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
@@ -522,7 +533,7 @@
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -590,9 +601,10 @@
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: return Reg;
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
+ switch (VT) {
+ default: llvm_unreachable("Unexpected VT");
case MVT::i8:
if (High) {
switch (Reg) {
@@ -608,7 +620,7 @@
}
} else {
switch (Reg) {
- default: return 0;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -645,7 +657,7 @@
}
case MVT::i16:
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -681,7 +693,7 @@
}
case MVT::i32:
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -733,7 +745,7 @@
}
}
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.h Tue Jan 15 11:16:16 2013
@@ -58,10 +58,6 @@
public:
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
- /// getX86RegNum - Returns the native X86 register number for the given LLVM
- /// register identifier.
- static unsigned getX86RegNum(unsigned RegNo);
-
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -104,6 +100,7 @@
/// callee-save registers on this target.
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
@@ -141,8 +138,8 @@
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
-unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
} // End llvm namespace
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86RegisterInfo.td Tue Jan 15 11:16:16 2013
@@ -13,258 +13,264 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Register definitions...
-//
-let Namespace = "X86" in {
+class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
+ let Namespace = "X86";
+ let HWEncoding = Enc;
+ let SubRegs = subregs;
+}
- // Subregister indices.
+// Subregister indices.
+let Namespace = "X86" in {
def sub_8bit : SubRegIndex;
def sub_8bit_hi : SubRegIndex;
def sub_16bit : SubRegIndex;
def sub_32bit : SubRegIndex;
- def sub_xmm : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+}
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+
+// In the register alias definitions below, we define which registers alias
+// which others. We only specify which registers the small registers alias,
+// because the register file generator is smart enough to figure out that
+// AL aliases AX if we tell it that AX aliased AL (for example).
+
+// Dwarf numbering is different for 32-bit and 64-bit, and there are
+// variations by target as well. Currently the first entry is for X86-64,
+// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+// and debug information on X86-32/Darwin)
+
+// 8-bit registers
+// Low registers
+def AL : X86Reg<"al", 0>;
+def DL : X86Reg<"dl", 2>;
+def CL : X86Reg<"cl", 1>;
+def BL : X86Reg<"bl", 3>;
+
+// High registers. On x86-64, these cannot be used in any instruction
+// with a REX prefix.
+def AH : X86Reg<"ah", 4>;
+def DH : X86Reg<"dh", 6>;
+def CH : X86Reg<"ch", 5>;
+def BH : X86Reg<"bh", 7>;
+
+// X86-64 only, requires REX.
+let CostPerUse = 1 in {
+def SIL : X86Reg<"sil", 6>;
+def DIL : X86Reg<"dil", 7>;
+def BPL : X86Reg<"bpl", 5>;
+def SPL : X86Reg<"spl", 4>;
+def R8B : X86Reg<"r8b", 8>;
+def R9B : X86Reg<"r9b", 9>;
+def R10B : X86Reg<"r10b", 10>;
+def R11B : X86Reg<"r11b", 11>;
+def R12B : X86Reg<"r12b", 12>;
+def R13B : X86Reg<"r13b", 13>;
+def R14B : X86Reg<"r14b", 14>;
+def R15B : X86Reg<"r15b", 15>;
+}
+
+// 16-bit registers
+let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
+def AX : X86Reg<"ax", 0, [AL,AH]>;
+def DX : X86Reg<"dx", 2, [DL,DH]>;
+def CX : X86Reg<"cx", 1, [CL,CH]>;
+def BX : X86Reg<"bx", 3, [BL,BH]>;
+}
+let SubRegIndices = [sub_8bit] in {
+def SI : X86Reg<"si", 6, [SIL]>;
+def DI : X86Reg<"di", 7, [DIL]>;
+def BP : X86Reg<"bp", 5, [BPL]>;
+def SP : X86Reg<"sp", 4, [SPL]>;
+}
+def IP : X86Reg<"ip", 0>;
+// X86-64 only, requires REX.
+let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+def R8W : X86Reg<"r8w", 8, [R8B]>;
+def R9W : X86Reg<"r9w", 9, [R9B]>;
+def R10W : X86Reg<"r10w", 10, [R10B]>;
+def R11W : X86Reg<"r11w", 11, [R11B]>;
+def R12W : X86Reg<"r12w", 12, [R12B]>;
+def R13W : X86Reg<"r13w", 13, [R13B]>;
+def R14W : X86Reg<"r14w", 14, [R14B]>;
+def R15W : X86Reg<"r15w", 15, [R15B]>;
+}
- // In the register alias definitions below, we define which registers alias
- // which others. We only specify which registers the small registers alias,
- // because the register file generator is smart enough to figure out that
- // AL aliases AX if we tell it that AX aliased AL (for example).
-
- // Dwarf numbering is different for 32-bit and 64-bit, and there are
- // variations by target as well. Currently the first entry is for X86-64,
- // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
- // and debug information on X86-32/Darwin)
-
- // 8-bit registers
- // Low registers
- def AL : Register<"al">;
- def DL : Register<"dl">;
- def CL : Register<"cl">;
- def BL : Register<"bl">;
-
- // X86-64 only, requires REX.
- let CostPerUse = 1 in {
- def SIL : Register<"sil">;
- def DIL : Register<"dil">;
- def BPL : Register<"bpl">;
- def SPL : Register<"spl">;
- def R8B : Register<"r8b">;
- def R9B : Register<"r9b">;
- def R10B : Register<"r10b">;
- def R11B : Register<"r11b">;
- def R12B : Register<"r12b">;
- def R13B : Register<"r13b">;
- def R14B : Register<"r14b">;
- def R15B : Register<"r15b">;
- }
-
- // High registers. On x86-64, these cannot be used in any instruction
- // with a REX prefix.
- def AH : Register<"ah">;
- def DH : Register<"dh">;
- def CH : Register<"ch">;
- def BH : Register<"bh">;
-
- // 16-bit registers
- let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
- def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
- def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
- def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
- def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
- }
- let SubRegIndices = [sub_8bit] in {
- def SI : RegisterWithSubRegs<"si", [SIL]>;
- def DI : RegisterWithSubRegs<"di", [DIL]>;
- def BP : RegisterWithSubRegs<"bp", [BPL]>;
- def SP : RegisterWithSubRegs<"sp", [SPL]>;
- }
- def IP : Register<"ip">;
-
- // X86-64 only, requires REX.
- let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
- def R8W : RegisterWithSubRegs<"r8w", [R8B]>;
- def R9W : RegisterWithSubRegs<"r9w", [R9B]>;
- def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
- def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
- def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
- def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
- def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
- def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
- }
- // 32-bit registers
- let SubRegIndices = [sub_16bit] in {
- def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
- def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
- def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
- def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
- def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
- def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
- def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
- def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
-
- // X86-64 only, requires REX
- let CostPerUse = 1 in {
- def R8D : RegisterWithSubRegs<"r8d", [R8W]>;
- def R9D : RegisterWithSubRegs<"r9d", [R9W]>;
- def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
- def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
- def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
- def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
- def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
- def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
- }}
-
- // 64-bit registers, X86-64 only
- let SubRegIndices = [sub_32bit] in {
- def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
- def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
- def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
- def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
- def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
- def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
- def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
- def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
-
- // These also require REX.
- let CostPerUse = 1 in {
- def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
- def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
- def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
- def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
- def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
- def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
- def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
- def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
- def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
- }}
-
- // MMX Registers. These are actually aliased to ST0 .. ST7
- def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
- def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
- def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
- def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
- def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
- def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
- def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
- def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
-
- // Pseudo Floating Point registers
- def FP0 : Register<"fp0">;
- def FP1 : Register<"fp1">;
- def FP2 : Register<"fp2">;
- def FP3 : Register<"fp3">;
- def FP4 : Register<"fp4">;
- def FP5 : Register<"fp5">;
- def FP6 : Register<"fp6">;
-
- // XMM Registers, used by the various SSE instruction set extensions.
- def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
- def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
- def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
- def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
- def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
- def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
- def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
- def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
-
- // X86-64 only
- let CostPerUse = 1 in {
- def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
- def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
- def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
- def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
- def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
- def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
- def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
- def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- } // CostPerUse
-
- // YMM Registers, used by AVX instructions
- let SubRegIndices = [sub_xmm] in {
- def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
- def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
- def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
- def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
- def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
- def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
- def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
- def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
- def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
- def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
- def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
- def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
- def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
- def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
- def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
- def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
- }
-
- class STRegister<string Name, list<Register> A> : Register<Name> {
- let Aliases = A;
- }
-
- // Floating point stack registers. These don't map one-to-one to the FP
- // pseudo registers, but we still mark them as aliasing FP registers. That
- // way both kinds can be live without exceeding the stack depth. ST registers
- // are only live around inline assembly.
- def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
- def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
- def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
- def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
- def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
- def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
- def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
- def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
-
- // Floating-point status word
- def FPSW : Register<"fpsw">;
-
- // Status flags register
- def EFLAGS : Register<"flags">;
-
- // Segment registers
- def CS : Register<"cs">;
- def DS : Register<"ds">;
- def SS : Register<"ss">;
- def ES : Register<"es">;
- def FS : Register<"fs">;
- def GS : Register<"gs">;
-
- // Debug registers
- def DR0 : Register<"dr0">;
- def DR1 : Register<"dr1">;
- def DR2 : Register<"dr2">;
- def DR3 : Register<"dr3">;
- def DR4 : Register<"dr4">;
- def DR5 : Register<"dr5">;
- def DR6 : Register<"dr6">;
- def DR7 : Register<"dr7">;
-
- // Control registers
- def CR0 : Register<"cr0">;
- def CR1 : Register<"cr1">;
- def CR2 : Register<"cr2">;
- def CR3 : Register<"cr3">;
- def CR4 : Register<"cr4">;
- def CR5 : Register<"cr5">;
- def CR6 : Register<"cr6">;
- def CR7 : Register<"cr7">;
- def CR8 : Register<"cr8">;
- def CR9 : Register<"cr9">;
- def CR10 : Register<"cr10">;
- def CR11 : Register<"cr11">;
- def CR12 : Register<"cr12">;
- def CR13 : Register<"cr13">;
- def CR14 : Register<"cr14">;
- def CR15 : Register<"cr15">;
-
- // Pseudo index registers
- def EIZ : Register<"eiz">;
- def RIZ : Register<"riz">;
+// 32-bit registers
+let SubRegIndices = [sub_16bit] in {
+def EAX : X86Reg<"eax", 0, [AX]>, DwarfRegNum<[-2, 0, 0]>;
+def EDX : X86Reg<"edx", 2, [DX]>, DwarfRegNum<[-2, 2, 2]>;
+def ECX : X86Reg<"ecx", 1, [CX]>, DwarfRegNum<[-2, 1, 1]>;
+def EBX : X86Reg<"ebx", 3, [BX]>, DwarfRegNum<[-2, 3, 3]>;
+def ESI : X86Reg<"esi", 6, [SI]>, DwarfRegNum<[-2, 6, 6]>;
+def EDI : X86Reg<"edi", 7, [DI]>, DwarfRegNum<[-2, 7, 7]>;
+def EBP : X86Reg<"ebp", 5, [BP]>, DwarfRegNum<[-2, 4, 5]>;
+def ESP : X86Reg<"esp", 4, [SP]>, DwarfRegNum<[-2, 5, 4]>;
+def EIP : X86Reg<"eip", 0, [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+// X86-64 only, requires REX
+let CostPerUse = 1 in {
+def R8D : X86Reg<"r8d", 8, [R8W]>;
+def R9D : X86Reg<"r9d", 9, [R9W]>;
+def R10D : X86Reg<"r10d", 10, [R10W]>;
+def R11D : X86Reg<"r11d", 11, [R11W]>;
+def R12D : X86Reg<"r12d", 12, [R12W]>;
+def R13D : X86Reg<"r13d", 13, [R13W]>;
+def R14D : X86Reg<"r14d", 14, [R14W]>;
+def R15D : X86Reg<"r15d", 15, [R15W]>;
+}}
+
+// 64-bit registers, X86-64 only
+let SubRegIndices = [sub_32bit] in {
+def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
+def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
+def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
+def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
+def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
+def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
+def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
+def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+// These also require REX.
+let CostPerUse = 1 in {
+def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>;
+def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>;
+def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
+def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
+def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
+def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
+def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
+def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
+def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
+}}
+
+// MMX Registers. These are actually aliased to ST0 .. ST7
+def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
+def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
+def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
+def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
+def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
+def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
+def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
+def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
+
+// Pseudo Floating Point registers
+def FP0 : X86Reg<"fp0", 0>;
+def FP1 : X86Reg<"fp1", 0>;
+def FP2 : X86Reg<"fp2", 0>;
+def FP3 : X86Reg<"fp3", 0>;
+def FP4 : X86Reg<"fp4", 0>;
+def FP5 : X86Reg<"fp5", 0>;
+def FP6 : X86Reg<"fp6", 0>;
+
+// XMM Registers, used by the various SSE instruction set extensions.
+def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
+def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
+def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
+def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
+def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
+def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
+def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
+def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
+
+// X86-64 only
+let CostPerUse = 1 in {
+def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>;
+def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>;
+def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
+def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
+def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
+def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
+def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
+def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
+} // CostPerUse
+
+// YMM Registers, used by AVX instructions
+let SubRegIndices = [sub_xmm] in {
+def YMM0: X86Reg<"ymm0", 0, [XMM0]>, DwarfRegAlias<XMM0>;
+def YMM1: X86Reg<"ymm1", 1, [XMM1]>, DwarfRegAlias<XMM1>;
+def YMM2: X86Reg<"ymm2", 2, [XMM2]>, DwarfRegAlias<XMM2>;
+def YMM3: X86Reg<"ymm3", 3, [XMM3]>, DwarfRegAlias<XMM3>;
+def YMM4: X86Reg<"ymm4", 4, [XMM4]>, DwarfRegAlias<XMM4>;
+def YMM5: X86Reg<"ymm5", 5, [XMM5]>, DwarfRegAlias<XMM5>;
+def YMM6: X86Reg<"ymm6", 6, [XMM6]>, DwarfRegAlias<XMM6>;
+def YMM7: X86Reg<"ymm7", 7, [XMM7]>, DwarfRegAlias<XMM7>;
+def YMM8: X86Reg<"ymm8", 8, [XMM8]>, DwarfRegAlias<XMM8>;
+def YMM9: X86Reg<"ymm9", 9, [XMM9]>, DwarfRegAlias<XMM9>;
+def YMM10: X86Reg<"ymm10", 10, [XMM10]>, DwarfRegAlias<XMM10>;
+def YMM11: X86Reg<"ymm11", 11, [XMM11]>, DwarfRegAlias<XMM11>;
+def YMM12: X86Reg<"ymm12", 12, [XMM12]>, DwarfRegAlias<XMM12>;
+def YMM13: X86Reg<"ymm13", 13, [XMM13]>, DwarfRegAlias<XMM13>;
+def YMM14: X86Reg<"ymm14", 14, [XMM14]>, DwarfRegAlias<XMM14>;
+def YMM15: X86Reg<"ymm15", 15, [XMM15]>, DwarfRegAlias<XMM15>;
}
+class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {
+ let Aliases = A;
+}
+
+// Floating point stack registers. These don't map one-to-one to the FP
+// pseudo registers, but we still mark them as aliasing FP registers. That
+// way both kinds can be live without exceeding the stack depth. ST registers
+// are only live around inline assembly.
+def ST0 : STRegister<"st(0)", 0, []>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : STRegister<"st(1)", 1, [FP6]>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : STRegister<"st(2)", 2, [FP5]>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : STRegister<"st(3)", 3, [FP4]>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : STRegister<"st(4)", 4, [FP3]>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : STRegister<"st(5)", 5, [FP2]>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : STRegister<"st(6)", 6, [FP1]>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : STRegister<"st(7)", 7, [FP0]>, DwarfRegNum<[40, 19, 18]>;
+
+// Floating-point status word
+def FPSW : X86Reg<"fpsw", 0>;
+
+// Status flags register
+def EFLAGS : X86Reg<"flags", 0>;
+
+// Segment registers
+def CS : X86Reg<"cs", 1>;
+def DS : X86Reg<"ds", 3>;
+def SS : X86Reg<"ss", 2>;
+def ES : X86Reg<"es", 0>;
+def FS : X86Reg<"fs", 4>;
+def GS : X86Reg<"gs", 5>;
+
+// Debug registers
+def DR0 : X86Reg<"dr0", 0>;
+def DR1 : X86Reg<"dr1", 1>;
+def DR2 : X86Reg<"dr2", 2>;
+def DR3 : X86Reg<"dr3", 3>;
+def DR4 : X86Reg<"dr4", 4>;
+def DR5 : X86Reg<"dr5", 5>;
+def DR6 : X86Reg<"dr6", 6>;
+def DR7 : X86Reg<"dr7", 7>;
+
+// Control registers
+def CR0 : X86Reg<"cr0", 0>;
+def CR1 : X86Reg<"cr1", 1>;
+def CR2 : X86Reg<"cr2", 2>;
+def CR3 : X86Reg<"cr3", 3>;
+def CR4 : X86Reg<"cr4", 4>;
+def CR5 : X86Reg<"cr5", 5>;
+def CR6 : X86Reg<"cr6", 6>;
+def CR7 : X86Reg<"cr7", 7>;
+def CR8 : X86Reg<"cr8", 8>;
+def CR9 : X86Reg<"cr9", 9>;
+def CR10 : X86Reg<"cr10", 10>;
+def CR11 : X86Reg<"cr11", 11>;
+def CR12 : X86Reg<"cr12", 12>;
+def CR13 : X86Reg<"cr13", 13>;
+def CR14 : X86Reg<"cr14", 14>;
+def CR15 : X86Reg<"cr15", 15>;
+
+// Pseudo index registers
+def EIZ : X86Reg<"eiz", 4>;
+def RIZ : X86Reg<"riz", 4>;
+
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86SelectionDAGInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86SelectionDAGInfo.cpp Tue Jan 15 11:16:16 2013
@@ -54,7 +54,7 @@
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
EVT IntPtr = TLI.getPointerTy();
- Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.cpp Tue Jan 15 11:16:16 2013
@@ -163,17 +163,6 @@
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-/// getSpecialAddressLatency - For targets where it is beneficial to
-/// backschedule instructions that compute addresses, return a value
-/// indicating the number of scheduling cycles of backscheduling that
-/// should be attempted.
-unsigned X86Subtarget::getSpecialAddressLatency() const {
- // For x86 out-of-order targets, back-schedule address computations so
- // that loads and stores aren't blocked.
- // This value was chosen arbitrarily.
- return 200;
-}
-
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
unsigned MaxLevel;
@@ -246,8 +235,11 @@
}
// If it's Nehalem, unaligned memory access is fast.
- // FIXME: Nehalem is family 6. Also include Westmere and later processors?
- if (Family == 15 && Model == 26) {
+ // Include Westmere and Sandy Bridge as well.
+ // FIXME: add later processors.
+ if (IsIntel && ((Family == 6 && Model == 26) ||
+ (Family == 6 && Model == 44) ||
+ (Family == 6 && Model == 42))) {
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
@@ -310,6 +302,10 @@
HasBMI2 = true;
ToggleFeature(X86::FeatureBMI2);
}
+ if (IsIntel && ((EBX >> 11) & 0x1)) {
+ HasRTM = true;
+ ToggleFeature(X86::FeatureRTM);
+ }
}
}
}
@@ -338,11 +334,13 @@
, HasLZCNT(false)
, HasBMI(false)
, HasBMI2(false)
+ , HasRTM(false)
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
, HasCmpxchg16b(false)
, UseLeaForSP(false)
+ , HasSlowDivide(false)
, PostRAScheduler(false)
, stackAlignment(4)
// FIXME: this is a known good value for Yonah. How about others?
@@ -397,6 +395,10 @@
}
}
+ // CPUName may have been set by the CPU detection code. Make sure the
+ // new MCSchedModel is used.
+ InitMCProcessorInfo(CPUName, FS);
+
if (X86ProcFamily == IntelAtom)
PostRAScheduler = true;
@@ -413,12 +415,12 @@
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
stackAlignment = StackAlignOverride;
- else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
- isTargetSolaris() || In64BitMode)
+ else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
+ In64BitMode)
stackAlignment = 16;
}
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86Subtarget.h Tue Jan 15 11:16:16 2013
@@ -118,6 +118,9 @@
/// HasBMI2 - Processor has BMI2 instructions.
bool HasBMI2;
+ /// HasRTM - Processor has RTM instructions.
+ bool HasRTM;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -136,6 +139,10 @@
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// HasSlowDivide - True if smaller divides are significantly faster than
+ /// full divides and should be used when possible.
+ bool HasSlowDivide;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -205,7 +212,8 @@
bool hasAES() const { return HasAES; }
bool hasPCLMUL() const { return HasPCLMUL; }
bool hasFMA() const { return HasFMA; }
- bool hasFMA4() const { return HasFMA4; }
+ // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
+ bool hasFMA4() const { return HasFMA4 && !HasFMA; }
bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
@@ -214,11 +222,13 @@
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
+ bool hasRTM() const { return HasRTM; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasSlowDivide() const { return HasSlowDivide; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
@@ -231,10 +241,10 @@
bool isTargetSolaris() const {
return TargetTriple.getOS() == Triple::Solaris;
}
-
- // ELF is a reasonably sane default and the only other X86 targets we
- // support are Darwin and Windows. Just use "not those".
- bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetELF() const {
+ return (TargetTriple.getEnvironment() == Triple::ELF ||
+ TargetTriple.isOSBinFormatELF());
+ }
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetNaCl() const {
return TargetTriple.getOS() == Triple::NativeClient;
@@ -245,7 +255,10 @@
bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
- bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
+ bool isTargetCOFF() const {
+ return (TargetTriple.getEnvironment() != Triple::ELF &&
+ TargetTriple.isOSBinFormatCOFF());
+ }
bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
bool isTargetWin64() const {
@@ -296,12 +309,6 @@
/// returns null.
const char *getBZeroEntry() const;
- /// getSpecialAddressLatency - For targets where it is beneficial to
- /// backschedule instructions that compute addresses, return a value
- /// indicating the number of scheduling cycles of backscheduling that
- /// should be attempted.
- unsigned getSpecialAddressLatency() const;
-
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.cpp Tue Jan 15 11:16:16 2013
@@ -36,7 +36,7 @@
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
- DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ DL(getSubtargetImpl()->isTargetDarwin() ?
"e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
"n8:16:32-S128" :
(getSubtargetImpl()->isTargetCygMing() ||
@@ -48,7 +48,8 @@
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this) {
+ JITInfo(*this),
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
void X86_64TargetMachine::anchor() { }
@@ -59,12 +60,13 @@
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this) {
+ JITInfo(*this),
+ STTI(&TLInfo), VTTI(&TLInfo){
}
/// X86TargetMachine ctor - Create an X86 target.
@@ -78,7 +80,6 @@
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
- ELFWriterInfo(is64Bit, true),
InstrItins(Subtarget.getInstrItineraryData()){
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
@@ -113,6 +114,12 @@
cl::desc("Minimize AVX to SSE transition penalty"),
cl::init(true));
+// Temporary option to control early if-conversion for x86 while adding machine
+// models.
+static cl::opt<bool>
+X86EarlyIfConv("x86-early-ifcvt",
+ cl::desc("Enable early if-conversion on X86"));
+
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -142,7 +149,7 @@
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
X86PassConfig *PC = new X86PassConfig(this, PM);
- if (Subtarget.hasCMov())
+ if (X86EarlyIfConv && Subtarget.hasCMov())
PC->enablePass(&EarlyIfConverterID);
return PC;
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86TargetMachine.h Tue Jan 15 11:16:16 2013
@@ -15,7 +15,6 @@
#define X86TARGETMACHINE_H
#include "X86.h"
-#include "X86ELFWriterInfo.h"
#include "X86InstrInfo.h"
#include "X86ISelLowering.h"
#include "X86FrameLowering.h"
@@ -23,8 +22,9 @@
#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -33,7 +33,6 @@
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
X86FrameLowering FrameLowering;
- X86ELFWriterInfo ELFWriterInfo;
InstrItineraryData InstrItins;
public:
@@ -62,9 +61,6 @@
virtual const X86RegisterInfo *getRegisterInfo() const {
return &getInstrInfo()->getRegisterInfo();
}
- virtual const X86ELFWriterInfo *getELFWriterInfo() const {
- return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
- }
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
@@ -80,17 +76,19 @@
///
class X86_32TargetMachine : public X86TargetMachine {
virtual void anchor();
- const TargetData DataLayout; // Calculates type size & alignment
+ const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
+ ScalarTargetTransformImpl STTI;
+ X86VectorTargetTransformInfo VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
@@ -103,23 +101,31 @@
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
virtual void anchor();
- const TargetData DataLayout; // Calculates type size & alignment
+ const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
+ ScalarTargetTransformImpl STTI;
+ X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
@@ -132,6 +138,12 @@
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
};
} // End llvm namespace
Modified: llvm/branches/AMDILBackend/lib/Target/X86/X86VZeroUpper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/X86/X86VZeroUpper.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/X86/X86VZeroUpper.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/X86/X86VZeroUpper.cpp Tue Jan 15 11:16:16 2013
@@ -42,7 +42,6 @@
private:
const TargetInstrInfo *TII; // Machine instruction info.
- MachineBasicBlock *MBB; // Current basic block
// Any YMM register live-in to this function?
bool FnHasLiveInYmm;
@@ -84,7 +83,7 @@
// 2) All states must be clean for the result to be clean
// 3) If none above and one unknown, the result state is also unknown
//
- unsigned computeState(unsigned PrevState, unsigned CurState) {
+ static unsigned computeState(unsigned PrevState, unsigned CurState) {
if (PrevState == ST_INIT)
return CurState;
@@ -122,7 +121,7 @@
}
static bool hasYmmReg(MachineInstr *MI) {
- for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
@@ -148,7 +147,7 @@
const TargetRegisterClass *RC = &X86::VR256RegClass;
for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
i != e; i++) {
- if (MRI.isPhysRegUsed(*i)) {
+ if (!MRI.reg_nodbg_empty(*i)) {
YMMUsed = true;
break;
}
@@ -189,7 +188,6 @@
MachineBasicBlock &BB) {
bool Changed = false;
unsigned BBNum = BB.getNumber();
- MBB = &BB;
// Don't process already solved BBs
if (BBSolved[BBNum])
@@ -207,7 +205,7 @@
// The entry MBB for the function may set the initial state to dirty if
// the function receives any YMM incoming arguments
- if (MBB == MF.begin()) {
+ if (&BB == MF.begin()) {
EntryState = ST_CLEAN;
if (FnHasLiveInYmm)
EntryState = ST_DIRTY;
@@ -253,7 +251,7 @@
// When unknown, only compute the information within the block to have
// it available in the exit if possible, but don't change the block.
if (EntryState != ST_UNKNOWN) {
- BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER));
++NumVZU;
}
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreAsmPrinter.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreAsmPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreAsmPrinter.cpp Tue Jan 15 11:16:16 2013
@@ -31,7 +31,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -112,7 +112,7 @@
EmitSpecialLLVMGlobal(GV))
return;
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreFrameLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreFrameLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreFrameLowering.cpp Tue Jan 15 11:16:16 2013
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/ErrorHandling.h"
@@ -98,12 +98,13 @@
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool FP = hasFP(MF);
- bool Nested = MF.getFunction()->
- getAttributes().hasAttrSomewhere(Attribute::Nest);
+ const AttrListPtr &PAL = MF.getFunction()->getAttributes();
- if (Nested) {
- loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
- }
+ for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I)
+ if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) {
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+ break;
+ }
// Work out frame sizes.
int FrameSize = MFI->getStackSize();
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreISelLowering.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreISelLowering.cpp Tue Jan 15 11:16:16 2013
@@ -285,7 +285,7 @@
llvm_unreachable(0);
}
SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
unsigned Size = TD->getTypeAllocSize(Ty);
SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
DAG.getConstant(Size, MVT::i32));
@@ -298,7 +298,7 @@
DebugLoc DL = Op.getDebugLoc();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+ SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
}
@@ -405,7 +405,7 @@
if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
return SDValue();
- unsigned ABIAlignment = getTargetData()->
+ unsigned ABIAlignment = getDataLayout()->
getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned load alone.
if (LD->getAlignment() >= ABIAlignment)
@@ -477,7 +477,7 @@
}
// Lower to a call to __misaligned_load(BasePtr).
- Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -507,7 +507,7 @@
if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
return SDValue();
}
- unsigned ABIAlignment = getTargetData()->
+ unsigned ABIAlignment = getDataLayout()->
getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned store alone.
if (ST->getAlignment() >= ABIAlignment) {
@@ -536,7 +536,7 @@
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -1499,7 +1499,7 @@
if (StoreBits % 8) {
break;
}
- unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(
ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
unsigned Alignment = ST->getAlignment();
if (Alignment >= ABIAlignment) {
@@ -1570,7 +1570,7 @@
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
unsigned Size = TD->getTypeAllocSize(Ty);
if (AM.BaseGV) {
return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreInstrInfo.td?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreInstrInfo.td Tue Jan 15 11:16:16 2013
@@ -33,7 +33,7 @@
SDNPVariadic]>;
def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -58,7 +58,7 @@
def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPMayStore]>;
// These are target-independent nodes, but have target-specific formats.
def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreRegisterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreRegisterInfo.cpp Tue Jan 15 11:16:16 2013
@@ -176,7 +176,7 @@
#ifndef NDEBUG
DEBUG(errs() << "\nFunction : "
- << MF.getFunction()->getName() << "\n");
+ << MF.getName() << "\n");
DEBUG(errs() << "<--------->\n");
DEBUG(MI.print(errs()));
DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n");
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.cpp Tue Jan 15 11:16:16 2013
@@ -27,12 +27,12 @@
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
- DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ DL("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this) {
+ TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
Modified: llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.h?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/XCore/XCoreTargetMachine.h Tue Jan 15 11:16:16 2013
@@ -20,17 +20,20 @@
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/DataLayout.h"
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
+ const DataLayout DL; // Calculates type size & alignment
XCoreInstrInfo InstrInfo;
XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -53,7 +56,13 @@
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
Modified: llvm/branches/AMDILBackend/lib/Transforms/IPO/ArgumentPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Transforms/IPO/ArgumentPromotion.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Transforms/IPO/ArgumentPromotion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Transforms/IPO/ArgumentPromotion.cpp Tue Jan 15 11:16:16 2013
@@ -153,7 +153,8 @@
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
for (unsigned i = 0; i != PointerArgs.size(); ++i) {
- bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+ bool isByVal=F->getParamAttributes(PointerArgs[i].second+1).
+ hasAttribute(Attributes::ByVal);
Argument *PtrArg = PointerArgs[i].first;
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
@@ -517,8 +518,10 @@
const AttrListPtr &PAL = F->getAttributes();
// Add any return attributes.
- if (Attributes attrs = PAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = PAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// First, determine the new argument list
unsigned ArgIndex = 1;
@@ -534,7 +537,8 @@
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
Params.push_back(I->getType());
- if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ Attributes attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
@@ -587,19 +591,13 @@
}
// Add any function attributes.
- if (Attributes attrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = PAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Type *RetTy = FTy->getReturnType();
- // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
- // have zero fixed arguments.
- bool ExtraArgHack = false;
- if (Params.empty() && FTy->isVarArg()) {
- ExtraArgHack = true;
- Params.push_back(Type::getInt32Ty(F->getContext()));
- }
-
// Construct the new function type using the new arguments.
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
@@ -613,7 +611,7 @@
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttrListPtr::get(AttributesVec));
+ NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec));
AttributesVec.clear();
F->getParent()->getFunctionList().insert(F, NF);
@@ -641,8 +639,10 @@
const AttrListPtr &CallPAL = CS.getAttributes();
// Add any return attributes.
- if (Attributes attrs = CallPAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = CallPAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -653,7 +653,8 @@
if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
Args.push_back(*AI); // Unmodified argument
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
} else if (ByValArgsToTransform.count(I)) {
@@ -711,30 +712,32 @@
}
}
- if (ExtraArgHack)
- Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
// Add any function attributes.
- if (Attributes attrs = CallPAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = CallPAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, "", Call);
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(),
+ AttributesVec));
} else {
New = CallInst::Create(NF, Args, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(),
+ AttributesVec));
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
@@ -870,16 +873,9 @@
}
// Increment I2 past all of the arguments added for this promoted pointer.
- for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
- ++I2;
+ std::advance(I2, ArgIndices.size());
}
- // Notify the alias analysis implementation that we inserted a new argument.
- if (ExtraArgHack)
- AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
- NF->arg_begin());
-
-
// Tell the alias analysis that the old function is about to disappear.
AA.replaceWithNewValue(F, NF);
Modified: llvm/branches/AMDILBackend/lib/Transforms/IPO/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Transforms/IPO/CMakeLists.txt?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Transforms/IPO/CMakeLists.txt (original)
+++ llvm/branches/AMDILBackend/lib/Transforms/IPO/CMakeLists.txt Tue Jan 15 11:16:16 2013
@@ -1,5 +1,6 @@
add_llvm_library(LLVMipo
ArgumentPromotion.cpp
+ BarrierNoopPass.cpp
ConstantMerge.cpp
DeadArgumentElimination.cpp
ExtractGV.cpp
Modified: llvm/branches/AMDILBackend/lib/Transforms/IPO/ConstantMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Transforms/IPO/ConstantMerge.cpp?rev=172541&r1=172540&r2=172541&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Transforms/IPO/ConstantMerge.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Transforms/IPO/ConstantMerge.cpp Tue Jan 15 11:16:16 2013
@@ -23,7 +23,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -50,7 +50,7 @@
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const TargetData *TD;
+ const DataLayout *TD;
};
}
@@ -98,7 +98,7 @@
}
bool ConstantMerge::runOnModule(Module &M) {
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -107,7 +107,7 @@
// Map unique <constants, has-unknown-alignment> pairs to globals. We don't
// want to merge globals of unknown alignment with those of explicit
- // alignment. If we have TargetData, we always know the alignment.
+ // alignment. If we have DataLayout, we always know the alignment.
DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
// Replacements - This vector contains a list of replacements to perform.
More information about the llvm-branch-commits
mailing list