[llvm] r262244 - [X86] Move the ATOMIC_LOAD_OP ISel from DAGToDAG to ISelLowering. NFCI.
Ahmed Bougacha via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 29 11:28:08 PST 2016
Author: ab
Date: Mon Feb 29 13:28:07 2016
New Revision: 262244
URL: http://llvm.org/viewvc/llvm-project?rev=262244&view=rev
Log:
[X86] Move the ATOMIC_LOAD_OP ISel from DAGToDAG to ISelLowering. NFCI.
This is long-standing dirtiness, as acknowledged by r77582:
The current trick is to select it into a merge_values with
the first definition being an implicit_def. The proper solution is
to add new ISD opcodes for the no-output variant.
Doing this before selection will let us combine away some constructs.
Differential Revision: http://reviews.llvm.org/D17659
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=262244&r1=262243&r2=262244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Feb 29 13:28:07 2016
@@ -194,7 +194,6 @@ namespace {
private:
SDNode *Select(SDNode *N) override;
SDNode *selectGather(SDNode *N, unsigned Opc);
- SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -1713,295 +1712,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseRe
return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
}
-/// Atomic opcode table
-///
-enum AtomicOpc {
- ADD,
- SUB,
- INC,
- DEC,
- OR,
- AND,
- XOR,
- AtomicOpcEnd
-};
-
-enum AtomicSz {
- ConstantI8,
- I8,
- SextConstantI16,
- ConstantI16,
- I16,
- SextConstantI32,
- ConstantI32,
- I32,
- SextConstantI64,
- ConstantI64,
- I64,
- AtomicSzEnd
-};
-
-static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
- {
- X86::LOCK_ADD8mi,
- X86::LOCK_ADD8mr,
- X86::LOCK_ADD16mi8,
- X86::LOCK_ADD16mi,
- X86::LOCK_ADD16mr,
- X86::LOCK_ADD32mi8,
- X86::LOCK_ADD32mi,
- X86::LOCK_ADD32mr,
- X86::LOCK_ADD64mi8,
- X86::LOCK_ADD64mi32,
- X86::LOCK_ADD64mr,
- },
- {
- X86::LOCK_SUB8mi,
- X86::LOCK_SUB8mr,
- X86::LOCK_SUB16mi8,
- X86::LOCK_SUB16mi,
- X86::LOCK_SUB16mr,
- X86::LOCK_SUB32mi8,
- X86::LOCK_SUB32mi,
- X86::LOCK_SUB32mr,
- X86::LOCK_SUB64mi8,
- X86::LOCK_SUB64mi32,
- X86::LOCK_SUB64mr,
- },
- {
- 0,
- X86::LOCK_INC8m,
- 0,
- 0,
- X86::LOCK_INC16m,
- 0,
- 0,
- X86::LOCK_INC32m,
- 0,
- 0,
- X86::LOCK_INC64m,
- },
- {
- 0,
- X86::LOCK_DEC8m,
- 0,
- 0,
- X86::LOCK_DEC16m,
- 0,
- 0,
- X86::LOCK_DEC32m,
- 0,
- 0,
- X86::LOCK_DEC64m,
- },
- {
- X86::LOCK_OR8mi,
- X86::LOCK_OR8mr,
- X86::LOCK_OR16mi8,
- X86::LOCK_OR16mi,
- X86::LOCK_OR16mr,
- X86::LOCK_OR32mi8,
- X86::LOCK_OR32mi,
- X86::LOCK_OR32mr,
- X86::LOCK_OR64mi8,
- X86::LOCK_OR64mi32,
- X86::LOCK_OR64mr,
- },
- {
- X86::LOCK_AND8mi,
- X86::LOCK_AND8mr,
- X86::LOCK_AND16mi8,
- X86::LOCK_AND16mi,
- X86::LOCK_AND16mr,
- X86::LOCK_AND32mi8,
- X86::LOCK_AND32mi,
- X86::LOCK_AND32mr,
- X86::LOCK_AND64mi8,
- X86::LOCK_AND64mi32,
- X86::LOCK_AND64mr,
- },
- {
- X86::LOCK_XOR8mi,
- X86::LOCK_XOR8mr,
- X86::LOCK_XOR16mi8,
- X86::LOCK_XOR16mi,
- X86::LOCK_XOR16mr,
- X86::LOCK_XOR32mi8,
- X86::LOCK_XOR32mi,
- X86::LOCK_XOR32mr,
- X86::LOCK_XOR64mi8,
- X86::LOCK_XOR64mi32,
- X86::LOCK_XOR64mr,
- }
-};
-
-// Return the target constant operand for atomic-load-op and do simple
-// translations, such as from atomic-load-add to lock-sub. The return value is
-// one of the following 3 cases:
-// + target-constant, the operand could be supported as a target constant.
-// + empty, the operand is not needed any more with the new op selected.
-// + non-empty, otherwise.
-static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
- SDLoc dl,
- enum AtomicOpc &Op, MVT NVT,
- SDValue Val,
- const X86Subtarget *Subtarget) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
- int64_t CNVal = CN->getSExtValue();
- // Quit if not 32-bit imm.
- if ((int32_t)CNVal != CNVal)
- return Val;
- // Quit if INT32_MIN: it would be negated as it is negative and overflow,
- // producing an immediate that does not fit in the 32 bits available for
- // an immediate operand to sub. However, it still fits in 32 bits for the
- // add (since it is not negated) so we can return target-constant.
- if (CNVal == INT32_MIN)
- return CurDAG->getTargetConstant(CNVal, dl, NVT);
- // For atomic-load-add, we could do some optimizations.
- if (Op == ADD) {
- // Translate to INC/DEC if ADD by 1 or -1.
- if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
- Op = (CNVal == 1) ? INC : DEC;
- // No more constant operand after being translated into INC/DEC.
- return SDValue();
- }
- // Translate to SUB if ADD by negative value.
- if (CNVal < 0) {
- Op = SUB;
- CNVal = -CNVal;
- }
- }
- return CurDAG->getTargetConstant(CNVal, dl, NVT);
- }
-
- // If the value operand is single-used, try to optimize it.
- if (Op == ADD && Val.hasOneUse()) {
- // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
- if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
- Op = SUB;
- return Val.getOperand(1);
- }
- // A special case for i16, which needs truncating as, in most cases, it's
- // promoted to i32. We will translate
- // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
- if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
- Val.getOperand(0).getOpcode() == ISD::SUB &&
- X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
- Op = SUB;
- Val = Val.getOperand(0);
- return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
- Val.getOperand(1));
- }
- }
-
- return Val;
-}
-
-SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
- if (Node->hasAnyUseOfValue(0))
- return nullptr;
-
- SDLoc dl(Node);
-
- // Optimize common patterns for __sync_or_and_fetch and similar arith
- // operations where the result is not used. This allows us to use the "lock"
- // version of the arithmetic instruction.
- SDValue Chain = Node->getOperand(0);
- SDValue Ptr = Node->getOperand(1);
- SDValue Val = Node->getOperand(2);
- SDValue Base, Scale, Index, Disp, Segment;
- if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
- return nullptr;
-
- // Which index into the table.
- enum AtomicOpc Op;
- switch (Node->getOpcode()) {
- default:
- return nullptr;
- case ISD::ATOMIC_LOAD_OR:
- Op = OR;
- break;
- case ISD::ATOMIC_LOAD_AND:
- Op = AND;
- break;
- case ISD::ATOMIC_LOAD_XOR:
- Op = XOR;
- break;
- case ISD::ATOMIC_LOAD_ADD:
- Op = ADD;
- break;
- }
-
- Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
- bool isUnOp = !Val.getNode();
- bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
-
- unsigned Opc = 0;
- switch (NVT.SimpleTy) {
- default: return nullptr;
- case MVT::i8:
- if (isCN)
- Opc = AtomicOpcTbl[Op][ConstantI8];
- else
- Opc = AtomicOpcTbl[Op][I8];
- break;
- case MVT::i16:
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = AtomicOpcTbl[Op][SextConstantI16];
- else
- Opc = AtomicOpcTbl[Op][ConstantI16];
- } else
- Opc = AtomicOpcTbl[Op][I16];
- break;
- case MVT::i32:
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = AtomicOpcTbl[Op][SextConstantI32];
- else
- Opc = AtomicOpcTbl[Op][ConstantI32];
- } else
- Opc = AtomicOpcTbl[Op][I32];
- break;
- case MVT::i64:
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = AtomicOpcTbl[Op][SextConstantI64];
- else if (i64immSExt32(Val.getNode()))
- Opc = AtomicOpcTbl[Op][ConstantI64];
- else
- llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
- } else
- Opc = AtomicOpcTbl[Op][I64];
- break;
- }
-
- assert(Opc != 0 && "Invalid arith lock transform!");
-
- // Building the new node.
- SDValue Ret;
- if (isUnOp) {
- SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
- Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
- } else {
- SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
- Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
- }
-
- // Copying the MachineMemOperand.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
-
- // We need to have two outputs as that is what the original instruction had.
- // So we add a dummy, undefined output. This is safe as we checked first
- // that no-one uses our output anyway.
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, dl).getNode();
-}
-
/// Test whether the given X86ISD::CMP node has any uses which require the SF
/// or OF bits to be accurate.
static bool hasNoSignedComparisonUses(SDNode *N) {
@@ -2301,15 +2011,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *
return nullptr;
}
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_ADD: {
- SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
- if (RetVal)
- return RetVal;
- break;
- }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=262244&r1=262243&r2=262244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 29 13:28:07 2016
@@ -480,6 +480,10 @@ X86TargetLowering::X86TargetLowering(con
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
@@ -20333,19 +20337,68 @@ static SDValue LowerCTPOP(SDValue Op, co
return LowerVectorCTPOP(Op, Subtarget, DAG);
}
-static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
- SDNode *Node = Op.getNode();
- SDLoc dl(Node);
- EVT T = Node->getValueType(0);
- SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
- DAG.getConstant(0, dl, T), Node->getOperand(2));
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), negOp,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
+ unsigned NewOpc = 0;
+ switch (N->getOpcode()) {
+ case ISD::ATOMIC_LOAD_ADD:
+ NewOpc = X86ISD::LADD;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ NewOpc = X86ISD::LSUB;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ NewOpc = X86ISD::LOR;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ NewOpc = X86ISD::LXOR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ NewOpc = X86ISD::LAND;
+ break;
+ default:
+ llvm_unreachable("Unknown ATOMIC_LOAD_ opcode");
+ }
+
+ MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+ return DAG.getMemIntrinsicNode(
+ NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
+ {N->getOperand(0), N->getOperand(1), N->getOperand(2)},
+ /*MemVT=*/N->getSimpleValueType(0), MMO);
+}
+
+/// Lower atomic_load_ops into LOCK-prefixed operations.
+static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ unsigned Opc = N->getOpcode();
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+
+ // We can lower atomic_load_add into LXADD. However, any other atomicrmw op
+ // can only be lowered when the result is unused. They should have already
+ // been transformed into a cmpxchg loop in AtomicExpand.
+ if (N->hasAnyUseOfValue(0)) {
+ // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
+ // select LXADD if LOCK_SUB can't be selected.
+ if (Opc == ISD::ATOMIC_LOAD_SUB) {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
+ RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
+ RHS, AN->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
+ }
+ assert(Opc == ISD::ATOMIC_LOAD_ADD &&
+ "Used AtomicRMW ops other than Add should have been expanded!");
+ return N;
+ }
+
+ SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG);
+ // RAUW the chain, but don't worry about the result, as it's unused.
+ assert(!N->hasAnyUseOfValue(0));
+ DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
+ return SDValue();
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
@@ -20767,7 +20820,11 @@ SDValue X86TargetLowering::LowerOperatio
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
- case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
@@ -21221,6 +21278,11 @@ const char *X86TargetLowering::getTarget
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
+ case X86ISD::LADD: return "X86ISD::LADD";
+ case X86ISD::LSUB: return "X86ISD::LSUB";
+ case X86ISD::LOR: return "X86ISD::LOR";
+ case X86ISD::LXOR: return "X86ISD::LXOR";
+ case X86ISD::LAND: return "X86ISD::LAND";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
@@ -28861,6 +28923,26 @@ static SDValue performVZEXTCombine(SDNod
return SDValue();
}
+// Canonicalize (LSUB p, 1) -> (LADD p, -1).
+static SDValue performLSUBCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ MVT VT = RHS.getSimpleValueType();
+ SDLoc DL(N);
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+
+ RHS = DAG.getConstant(-1, DL, VT);
+ MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+ return DAG.getMemIntrinsicNode(X86ISD::LADD, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {Chain, LHS, RHS}, VT, MMO);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28937,6 +29019,7 @@ SDValue X86TargetLowering::PerformDAGCom
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
+ case X86ISD::LSUB: return performLSUBCombine(N, DAG, Subtarget);
}
return SDValue();
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=262244&r1=262243&r2=262244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Feb 29 13:28:07 2016
@@ -514,6 +514,10 @@ namespace llvm {
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
+ /// LOCK-prefixed arithmetic read-modify-write instructions.
+ /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
+ LADD, LSUB, LOR, LXOR, LAND,
+
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=262244&r1=262243&r2=262244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Mon Feb 29 13:28:07 2016
@@ -568,7 +568,7 @@ def Int_MemBarrier : I<0, Pseudo, (outs)
// ImmOpc8 corresponds to the mi8 version of the instruction
// ImmMod corresponds to the instruction format of the mi and mi8 versions
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
- Format ImmMod, string mnemonic> {
+ Format ImmMod, SDPatternOperator Op, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteALULd, WriteRMW] in {
@@ -577,106 +577,124 @@ def NAME#8mr : I<{RegOpc{7}, RegOpc{6},
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
!strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, LOCK;
+ [(set EFLAGS, (Op addr:$dst, GR8:$src2))],
+ IIC_ALU_NONMEM>, LOCK;
+
def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, OpSize16, LOCK;
+ [(set EFLAGS, (Op addr:$dst, GR16:$src2))],
+ IIC_ALU_NONMEM>, OpSize16, LOCK;
+
def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, OpSize32, LOCK;
+ [(set EFLAGS, (Op addr:$dst, GR32:$src2))],
+ IIC_ALU_NONMEM>, OpSize32, LOCK;
+
def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, LOCK;
+ [(set EFLAGS, (Op addr:$dst, GR64:$src2))],
+ IIC_ALU_NONMEM>, LOCK;
def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
!strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+ [(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))],
+ IIC_ALU_MEM>, LOCK;
def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize16, LOCK;
+ [(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))],
+ IIC_ALU_MEM>, OpSize16, LOCK;
def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize32, LOCK;
+ [(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))],
+ IIC_ALU_MEM>, OpSize32, LOCK;
def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+ [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],
+ IIC_ALU_MEM>, LOCK;
def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
!strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize16, LOCK;
+ [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))],
+ IIC_ALU_MEM>, OpSize16, LOCK;
+
def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
!strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize32, LOCK;
+ [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))],
+ IIC_ALU_MEM>, OpSize32, LOCK;
+
def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
!strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+ [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],
+ IIC_ALU_MEM>, LOCK;
}
}
-defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
-defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
-defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
-defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
-defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-// Optimized codegen when the non-memory output is not used.
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- string mnemonic> {
+ int Increment, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
-
+ SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {
def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
!strconcat(mnemonic, "{b}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
+ [(set EFLAGS, (X86lock_add addr:$dst, (i8 Increment)))],
+ IIC_UNARY_MEM>, LOCK;
def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
!strconcat(mnemonic, "{w}\t$dst"),
- [], IIC_UNARY_MEM>, OpSize16, LOCK;
+ [(set EFLAGS, (X86lock_add addr:$dst, (i16 Increment)))],
+ IIC_UNARY_MEM>, OpSize16, LOCK;
def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
!strconcat(mnemonic, "{l}\t$dst"),
- [], IIC_UNARY_MEM>, OpSize32, LOCK;
+ [(set EFLAGS, (X86lock_add addr:$dst, (i32 Increment)))],
+ IIC_UNARY_MEM>, OpSize32, LOCK;
def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
!strconcat(mnemonic, "{q}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
+ [(set EFLAGS, (X86lock_add addr:$dst, (i64 Increment)))],
+ IIC_UNARY_MEM>, LOCK;
}
}
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
+defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, 1, "inc">;
+defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=262244&r1=262243&r2=262244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Feb 29 13:28:07 2016
@@ -72,6 +72,10 @@ def SDTX86cas : SDTypeProfile<0, 3, [SDT
SDTCisVT<2, i8>]>;
def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
@@ -235,6 +239,22 @@ def X86xor_flag : SDNode<"X86ISD::XOR",
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
+def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_sub : SDNode<"X86ISD::LSUB", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_or : SDNode<"X86ISD::LOR", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_xor : SDNode<"X86ISD::LXOR", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
More information about the llvm-commits
mailing list