[LLVMdev] Implementing llvm.atomic.cmp.swap.i32 on PowerPC
Gary Benson
gbenson at redhat.com
Wed Jul 2 08:29:15 PDT 2008
Evan Cheng wrote:
> You need to insert new basic blocks and update CFG to accomplish this.
> There is a hackish way to do this right now. Add a pseudo instruction
> to represent this operation and mark it usesCustomDAGSchedInserter.
> This means the intrinsic is mapped to a single (pseudo) node. But it
> is then expanded into instructions that can span multiple basic
> blocks. See PPCTargetLowering::EmitInstrWithCustomInserter().
How does this look? It's a big patch, but it basically does this:
- Adds ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP and ATOMIC_SWAP nodes,
and ATOMIC_LOAD_ADD_I{32,64}, ATOMIC_CMP_SWAP_I{32,64} and
ATOMIC_SWAP_I{32,64} pseudo-instructions with custom inserters.
- Replaces L[WD]ARX and ST[WD]CX pseudo-instructions with the
actual PPC instructions they represent.
- Removes CMP_UNRESERVE nodes and CMP_UNRES[wd]{,i} pseudo-
instructions.
Cheers,
Gary
--
http://gbenson.net/
-------------- next part --------------
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h (revision 52957)
+++ lib/Target/PowerPC/PPCISelLowering.h (working copy)
@@ -152,6 +152,11 @@
/// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
MTFSF,
+ /// ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP, ATOMIC_SWAP - These
+ /// correspond to the llvm.atomic.load.add, llvm.atomic.cmp.swap
+ /// and llvm.atomic.swap intrinsics.
+ ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP, ATOMIC_SWAP,
+
/// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
/// reserve indexed. This is used to implement atomic operations.
LARX,
@@ -160,10 +165,6 @@
/// indexed. This is used to implement atomic operations.
STCX,
- /// CMP_UNRESERVE = Test for equality and "unreserve" if not true. This
- /// is used to implement atomic operations.
- CMP_UNRESERVE,
-
/// TAILCALL - Indicates a tail call should be taken.
TAILCALL,
/// TC_RETURN - A tail call return.
@@ -325,10 +326,6 @@
SelectionDAG &DAG) const;
private:
- /// PPCAtomicLabelIndex - Keep track the number of PPC atomic labels.
- ///
- unsigned PPCAtomicLabelIndex;
-
SDOperand getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDOperand getReturnAddrFrameIndex(SelectionDAG & DAG) const;
Index: lib/Target/PowerPC/PPCInstr64Bit.td
===================================================================
--- lib/Target/PowerPC/PPCInstr64Bit.td (revision 52957)
+++ lib/Target/PowerPC/PPCInstr64Bit.td (working copy)
@@ -116,23 +116,35 @@
def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
(BL8_ELF texternalsym:$dst)>;
-// Atomic operations.
-def LDARX : Pseudo<(outs G8RC:$rD), (ins memrr:$ptr, i32imm:$label),
- "\nLa${label}_entry:\n\tldarx $rD, $ptr",
- [(set G8RC:$rD, (PPClarx xoaddr:$ptr, imm:$label))]>;
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ let Uses = [R0] in
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+ [(set G8RC:$dst, (PPCatomic_load_add xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst, (PPCatomic_cmp_swap xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
+ "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst, (PPCatomic_swap xoaddr:$ptr, G8RC:$new))]>;
+ }
+}
-let Defs = [CR0] in {
-def STDCX : Pseudo<(outs), (ins G8RC:$rS, memrr:$dst, i32imm:$label),
- "stdcx. $rS, $dst\n\tbne- La${label}_entry\nLa${label}_exit:",
- [(PPCstcx G8RC:$rS, xoaddr:$dst, imm:$label)]>;
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
-def CMP_UNRESd : Pseudo<(outs), (ins G8RC:$rA, G8RC:$rB, i32imm:$label),
- "cmpd $rA, $rB\n\tbne- La${label}_exit",
- [(PPCcmp_unres G8RC:$rA, G8RC:$rB, imm:$label)]>;
-def CMP_UNRESdi : Pseudo<(outs), (ins G8RC:$rA, s16imm64:$imm, i32imm:$label),
- "cmpdi $rA, $imm\n\tbne- La${label}_exit",
- [(PPCcmp_unres G8RC:$rA, immSExt16:$imm, imm:$label)]>;
-}
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ isDOT;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNdi8 :Pseudo< (outs),
Index: lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- lib/Target/PowerPC/PPCInstrInfo.td (revision 52957)
+++ lib/Target/PowerPC/PPCInstrInfo.td (working copy)
@@ -42,17 +42,23 @@
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
]>;
-
-def SDT_PPClarx : SDTypeProfile<1, 2, [
- SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_load_add : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>
]>;
-def SDT_PPCstcx : SDTypeProfile<0, 3, [
- SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_cmp_swap : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>
]>;
-def SDT_PPCcmp_unres : SDTypeProfile<0, 3, [
- SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_swap : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>
]>;
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@@ -143,12 +149,22 @@
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
+// Atomic operations
+def PPCatomic_load_add : SDNode<"PPCISD::ATOMIC_LOAD_ADD",
+ SDT_PPCatomic_load_add,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def PPCatomic_cmp_swap : SDNode<"PPCISD::ATOMIC_CMP_SWAP",
+ SDT_PPCatomic_cmp_swap,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def PPCatomic_swap : SDNode<"PPCISD::ATOMIC_SWAP",
+ SDT_PPCatomic_swap,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
-def PPCcmp_unres : SDNode<"PPCISD::CMP_UNRESERVE", SDT_PPCcmp_unres,
- [SDNPHasChain]>;
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
@@ -530,23 +546,35 @@
"dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
-// Atomic operations.
-def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$ptr, i32imm:$label),
- "\nLa${label}_entry:\n\tlwarx $rD, $ptr", LdStLWARX,
- [(set GPRC:$rD, (PPClarx xoaddr:$ptr, imm:$label))]>;
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ let Uses = [R0] in
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ [(set GPRC:$dst, (PPCatomic_load_add xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst, (PPCatomic_cmp_swap xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst, (PPCatomic_swap xoaddr:$ptr, GPRC:$new))]>;
+ }
+}
-let Defs = [CR0] in {
-def STWCX : Pseudo<(outs), (ins GPRC:$rS, memrr:$dst, i32imm:$label),
- "stwcx. $rS, $dst\n\tbne- La${label}_entry\nLa${label}_exit:",
- [(PPCstcx GPRC:$rS, xoaddr:$dst, imm:$label)]>;
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
-def CMP_UNRESw : Pseudo<(outs), (ins GPRC:$rA, GPRC:$rB, i32imm:$label),
- "cmpw $rA, $rB\n\tbne- La${label}_exit",
- [(PPCcmp_unres GPRC:$rA, GPRC:$rB, imm:$label)]>;
-def CMP_UNRESwi : Pseudo<(outs), (ins GPRC:$rA, s16imm:$imm, i32imm:$label),
- "cmpwi $rA, $imm\n\tbne- La${label}_exit",
- [(PPCcmp_unres GPRC:$rA, immSExt16:$imm, imm:$label)]>;
-}
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ isDOT;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
@@ -1327,9 +1355,5 @@
def : Pat<(extloadf32 xaddr:$src),
(FMRSD (LFSX xaddr:$src))>;
-// Atomic operations
-def : Pat<(PPCcmp_unres immSExt16:$imm, GPRC:$rA, imm:$label),
- (CMP_UNRESwi GPRC:$rA, immSExt16:$imm, imm:$label)>;
-
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp (revision 52957)
+++ lib/Target/PowerPC/PPCISelLowering.cpp (working copy)
@@ -40,8 +40,7 @@
cl::Hidden);
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()),
- PPCAtomicLabelIndex(0) {
+ : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
setPow2DivIsCheap();
@@ -378,45 +377,47 @@
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
- case PPCISD::FSEL: return "PPCISD::FSEL";
- case PPCISD::FCFID: return "PPCISD::FCFID";
- case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
- case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
- case PPCISD::STFIWX: return "PPCISD::STFIWX";
- case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
- case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
- case PPCISD::VPERM: return "PPCISD::VPERM";
- case PPCISD::Hi: return "PPCISD::Hi";
- case PPCISD::Lo: return "PPCISD::Lo";
- case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
- case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
- case PPCISD::SRL: return "PPCISD::SRL";
- case PPCISD::SRA: return "PPCISD::SRA";
- case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
- case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
- case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
- case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
- case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
- case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
- case PPCISD::MFCR: return "PPCISD::MFCR";
- case PPCISD::VCMP: return "PPCISD::VCMP";
- case PPCISD::VCMPo: return "PPCISD::VCMPo";
- case PPCISD::LBRX: return "PPCISD::LBRX";
- case PPCISD::STBRX: return "PPCISD::STBRX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
- case PPCISD::CMP_UNRESERVE: return "PPCISD::CMP_UNRESERVE";
- case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
- case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
- case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
- case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::MTFSF: return "PPCISD::MTFSF";
- case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
- case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
+ case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
+ case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::ATOMIC_LOAD_ADD: return "PPCISD::ATOMIC_LOAD_ADD";
+ case PPCISD::ATOMIC_CMP_SWAP: return "PPCISD::ATOMIC_CMP_SWAP";
+ case PPCISD::ATOMIC_SWAP: return "PPCISD::ATOMIC_SWAP";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
+ case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
}
}
@@ -2726,33 +2727,13 @@
SDOperand Ptr = Op.getOperand(1);
SDOperand Incr = Op.getOperand(2);
- // Issue a "load and reserve".
- std::vector<MVT> VTs;
- VTs.push_back(VT);
- VTs.push_back(MVT::Other);
-
- SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
SDOperand Ops[] = {
- Chain, // Chain
- Ptr, // Ptr
- Label, // Label
+ Chain,
+ Ptr,
+ Incr,
};
- SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
- Chain = Load.getValue(1);
-
- // Compute new value.
- SDOperand NewVal = DAG.getNode(ISD::ADD, VT, Load, Incr);
-
- // Issue a "store and check".
- SDOperand Ops2[] = {
- Chain, // Chain
- NewVal, // Value
- Ptr, // Ptr
- Label, // Label
- };
- SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
- SDOperand OutOps[] = { Load, Store };
- return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+ return DAG.getNode(PPCISD::ATOMIC_LOAD_ADD, VTs, Ops, 3);
}
SDOperand PPCTargetLowering::LowerAtomicCMP_SWAP(SDOperand Op, SelectionDAG &DAG) {
@@ -2762,39 +2743,14 @@
SDOperand NewVal = Op.getOperand(2);
SDOperand OldVal = Op.getOperand(3);
- // Issue a "load and reserve".
- std::vector<MVT> VTs;
- VTs.push_back(VT);
- VTs.push_back(MVT::Other);
-
- SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
SDOperand Ops[] = {
- Chain, // Chain
- Ptr, // Ptr
- Label, // Label
+ Chain,
+ Ptr,
+ OldVal,
+ NewVal,
};
- SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
- Chain = Load.getValue(1);
-
- // Compare and unreserve if not equal.
- SDOperand Ops2[] = {
- Chain, // Chain
- OldVal, // Old value
- Load, // Value in memory
- Label, // Label
- };
- Chain = DAG.getNode(PPCISD::CMP_UNRESERVE, MVT::Other, Ops2, 4);
-
- // Issue a "store and check".
- SDOperand Ops3[] = {
- Chain, // Chain
- NewVal, // Value
- Ptr, // Ptr
- Label, // Label
- };
- SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops3, 4);
- SDOperand OutOps[] = { Load, Store };
- return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+ return DAG.getNode(PPCISD::ATOMIC_CMP_SWAP, VTs, Ops, 4);
}
SDOperand PPCTargetLowering::LowerAtomicSWAP(SDOperand Op, SelectionDAG &DAG) {
@@ -2803,30 +2759,13 @@
SDOperand Ptr = Op.getOperand(1);
SDOperand NewVal = Op.getOperand(2);
- // Issue a "load and reserve".
- std::vector<MVT> VTs;
- VTs.push_back(VT);
- VTs.push_back(MVT::Other);
-
- SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
SDOperand Ops[] = {
- Chain, // Chain
- Ptr, // Ptr
- Label, // Label
+ Chain,
+ Ptr,
+ NewVal,
};
- SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
- Chain = Load.getValue(1);
-
- // Issue a "store and check".
- SDOperand Ops2[] = {
- Chain, // Chain
- NewVal, // Value
- Ptr, // Ptr
- Label, // Label
- };
- SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
- SDOperand OutOps[] = { Load, Store };
- return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+ return DAG.getNode(PPCISD::ATOMIC_SWAP, VTs, Ops, 3);
}
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
@@ -3980,59 +3919,198 @@
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_CC_F4 ||
- MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
- "Unexpected instr type to insert");
-
- // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
- // control-flow pattern. The incoming instruction knows the destination vreg
- // to set, the condition code register to branch on, the true/false values to
- // select between, and a branch opcode to use.
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
ilist<MachineBasicBlock>::iterator It = BB;
++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // cmpTY ccX, r1, r2
- // bCC copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
- unsigned SelectPred = MI->getOperand(4).getImm();
- BuildMI(BB, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
- MachineFunction *F = BB->getParent();
- F->getBasicBlockList().insert(It, copy0MBB);
- F->getBasicBlockList().insert(It, sinkMBB);
- // Update machine-CFG edges by transferring all successors of the current
- // block to the new block which will contain the Phi node for the select.
- sinkMBB->transferSuccessors(BB);
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
- // ...
- BB = sinkMBB;
- BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+
+ MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, loopMBB);
+ F->getBasicBlockList().insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(is64bit ? PPC::ADD4 : PPC::ADD8), PPC::R0)
+ .addReg(incr).addReg(dest);
+ BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(PPC::R0).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+
+ MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, loopMBB);
+ F->getBasicBlockList().insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- exitMBB
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(exitMBB);
+ BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+
+ MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, loopMBB);
+ F->getBasicBlockList().insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ }
+ else {
+ assert(0 && "Unexpected instr type to insert");
+ }
+
delete MI; // The pseudo instruction is gone now.
return BB;
}
More information about the llvm-dev
mailing list