[LLVMdev] Implementing llvm.atomic.cmp.swap.i32 on PowerPC

Gary Benson gbenson at redhat.com
Wed Jul 2 08:29:15 PDT 2008


Evan Cheng wrote:
> You need to insert new basic blocks and update CFG to accomplish this.  
> There is a hackish way to do this right now.  Add a pseudo instruction  
> to represent this operation and mark it usesCustomDAGSchedInserter.  
> This means the intrinsic is mapped to a single (pseudo) node. But it  
> is then expanded into instructions that can span multiple basic  
> blocks. See PPCTargetLowering::EmitInstrWithCustomInserter().

How does this look?  It's a big patch, but it basically does this:

 - Adds ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP and ATOMIC_SWAP nodes,
   and ATOMIC_LOAD_ADD_I{32,64}, ATOMIC_CMP_SWAP_I{32,64} and
   ATOMIC_SWAP_I{32,64} pseudo-instructions with custom inserters.

 - Replaces L[WD]ARX and ST[WD]CX pseudo-instructions with the
   actual PPC instructions they represent.

 - Removes CMP_UNRESERVE nodes and CMP_UNRES[wd]{,i} pseudo-
   instructions.

Cheers,
Gary

-- 
http://gbenson.net/
-------------- next part --------------
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h	(revision 52957)
+++ lib/Target/PowerPC/PPCISelLowering.h	(working copy)
@@ -152,6 +152,11 @@
       /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
       MTFSF,
 
+      /// ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP, ATOMIC_SWAP - These
+      /// correspond to the llvm.atomic.load.add, llvm.atomic.cmp.swap
+      /// and llvm.atomic.swap intrinsics.
+      ATOMIC_LOAD_ADD, ATOMIC_CMP_SWAP, ATOMIC_SWAP,
+
       /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
       /// reserve indexed. This is used to implement atomic operations.
       LARX,
@@ -160,10 +165,6 @@
       /// indexed. This is used to implement atomic operations.
       STCX,
 
-      /// CMP_UNRESERVE = Test for equality and "unreserve" if not true. This
-      /// is used to implement atomic operations.
-      CMP_UNRESERVE,
-
       /// TAILCALL - Indicates a tail call should be taken.
       TAILCALL,
       /// TC_RETURN - A tail call return.
@@ -325,10 +326,6 @@
                                                    SelectionDAG &DAG) const;
 
   private:
-    /// PPCAtomicLabelIndex - Keep track the number of PPC atomic labels.
-    ///
-    unsigned PPCAtomicLabelIndex;
-
     SDOperand getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDOperand getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
Index: lib/Target/PowerPC/PPCInstr64Bit.td
===================================================================
--- lib/Target/PowerPC/PPCInstr64Bit.td	(revision 52957)
+++ lib/Target/PowerPC/PPCInstr64Bit.td	(working copy)
@@ -116,23 +116,35 @@
 def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
           (BL8_ELF texternalsym:$dst)>;
 
-// Atomic operations.
-def LDARX : Pseudo<(outs G8RC:$rD), (ins memrr:$ptr, i32imm:$label),
-                   "\nLa${label}_entry:\n\tldarx $rD, $ptr",
-                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr, imm:$label))]>;
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+  let Uses = [CR0] in {
+    let Uses = [R0] in
+    def ATOMIC_LOAD_ADD_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+      "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+      [(set G8RC:$dst, (PPCatomic_load_add xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_CMP_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
+      "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+      [(set G8RC:$dst, (PPCatomic_cmp_swap xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+    def ATOMIC_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
+      "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+      [(set G8RC:$dst, (PPCatomic_swap xoaddr:$ptr, G8RC:$new))]>;
+  }
+}
 
-let Defs = [CR0] in {
-def STDCX : Pseudo<(outs), (ins G8RC:$rS, memrr:$dst, i32imm:$label),
-                  "stdcx. $rS, $dst\n\tbne- La${label}_entry\nLa${label}_exit:",
-                   [(PPCstcx G8RC:$rS, xoaddr:$dst, imm:$label)]>;
+// Instructions to support atomic operations
+def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
+                   "ldarx $rD, $ptr", LdStLDARX,
+                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
 
-def CMP_UNRESd : Pseudo<(outs), (ins G8RC:$rA, G8RC:$rB, i32imm:$label),
-                         "cmpd $rA, $rB\n\tbne- La${label}_exit",
-                         [(PPCcmp_unres G8RC:$rA, G8RC:$rB, imm:$label)]>;
-def CMP_UNRESdi : Pseudo<(outs), (ins G8RC:$rA, s16imm64:$imm, i32imm:$label),
-                         "cmpdi $rA, $imm\n\tbne- La${label}_exit",
-                         [(PPCcmp_unres G8RC:$rA, immSExt16:$imm, imm:$label)]>;
-}
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdcx. $rS, $dst", LdStSTDCX,
+                   [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+                   isDOT;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
 def TCRETURNdi8 :Pseudo< (outs),
Index: lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- lib/Target/PowerPC/PPCInstrInfo.td	(revision 52957)
+++ lib/Target/PowerPC/PPCInstrInfo.td	(working copy)
@@ -42,17 +42,23 @@
   SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
 ]>;
 
-
-def SDT_PPClarx : SDTypeProfile<1, 2, [
-  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_load_add : SDTypeProfile<1, 2, [
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>
 ]>;
-def SDT_PPCstcx : SDTypeProfile<0, 3, [
-  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_cmp_swap : SDTypeProfile<1, 3, [
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>
 ]>;
-def SDT_PPCcmp_unres : SDTypeProfile<0, 3, [
-  SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>
+def SDT_PPCatomic_swap : SDTypeProfile<1, 2, [
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>
 ]>;
 
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
 def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
@@ -143,12 +149,22 @@
 def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
                            [SDNPHasChain, SDNPMayStore]>;
 
+// Atomic operations
+def PPCatomic_load_add : SDNode<"PPCISD::ATOMIC_LOAD_ADD",
+                                SDT_PPCatomic_load_add,
+                                [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def PPCatomic_cmp_swap : SDNode<"PPCISD::ATOMIC_CMP_SWAP",
+                                SDT_PPCatomic_cmp_swap,
+                                [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def PPCatomic_swap : SDNode<"PPCISD::ATOMIC_SWAP",
+                            SDT_PPCatomic_swap,
+                            [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+// Instructions to support atomic operations
 def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
                           [SDNPHasChain, SDNPMayLoad]>;
 def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
                           [SDNPHasChain, SDNPMayStore]>;
-def PPCcmp_unres  : SDNode<"PPCISD::CMP_UNRESERVE", SDT_PPCcmp_unres,
-                           [SDNPHasChain]>;
 
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
@@ -530,23 +546,35 @@
                       "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
 
-// Atomic operations.
-def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$ptr, i32imm:$label),
-                   "\nLa${label}_entry:\n\tlwarx $rD, $ptr", LdStLWARX,
-                   [(set GPRC:$rD, (PPClarx xoaddr:$ptr, imm:$label))]>;
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+  let Uses = [CR0] in {
+    let Uses = [R0] in
+    def ATOMIC_LOAD_ADD_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+      "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+      [(set GPRC:$dst, (PPCatomic_load_add xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_CMP_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+      "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+      [(set GPRC:$dst, (PPCatomic_cmp_swap xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+    def ATOMIC_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+      "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+      [(set GPRC:$dst, (PPCatomic_swap xoaddr:$ptr, GPRC:$new))]>;
+  }
+}
 
-let Defs = [CR0] in {
-def STWCX : Pseudo<(outs), (ins GPRC:$rS, memrr:$dst, i32imm:$label),
-                  "stwcx. $rS, $dst\n\tbne- La${label}_entry\nLa${label}_exit:",
-                   [(PPCstcx GPRC:$rS, xoaddr:$dst, imm:$label)]>;
+// Instructions to support atomic operations
+def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwarx $rD, $src", LdStLWARX,
+                   [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
 
-def CMP_UNRESw : Pseudo<(outs), (ins GPRC:$rA, GPRC:$rB, i32imm:$label),
-                         "cmpw $rA, $rB\n\tbne- La${label}_exit",
-                         [(PPCcmp_unres GPRC:$rA, GPRC:$rB, imm:$label)]>;
-def CMP_UNRESwi : Pseudo<(outs), (ins GPRC:$rA, s16imm:$imm, i32imm:$label),
-                         "cmpwi $rA, $imm\n\tbne- La${label}_exit",
-                         [(PPCcmp_unres GPRC:$rA, immSExt16:$imm, imm:$label)]>;
-}
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwcx. $rS, $dst", LdStSTWCX,
+                   [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+                   isDOT;
 
 //===----------------------------------------------------------------------===//
 // PPC32 Load Instructions.
@@ -1327,9 +1355,5 @@
 def : Pat<(extloadf32 xaddr:$src),
           (FMRSD (LFSX xaddr:$src))>;
 
-// Atomic operations
-def : Pat<(PPCcmp_unres immSExt16:$imm, GPRC:$rA, imm:$label),
-          (CMP_UNRESwi GPRC:$rA, immSExt16:$imm, imm:$label)>;
-
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp	(revision 52957)
+++ lib/Target/PowerPC/PPCISelLowering.cpp	(working copy)
@@ -40,8 +40,7 @@
                                      cl::Hidden);
 
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
-  : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()),
-    PPCAtomicLabelIndex(0) {
+  : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
     
   setPow2DivIsCheap();
   
@@ -378,45 +377,47 @@
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return 0;
-  case PPCISD::FSEL:          return "PPCISD::FSEL";
-  case PPCISD::FCFID:         return "PPCISD::FCFID";
-  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
-  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
-  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
-  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
-  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
-  case PPCISD::VPERM:         return "PPCISD::VPERM";
-  case PPCISD::Hi:            return "PPCISD::Hi";
-  case PPCISD::Lo:            return "PPCISD::Lo";
-  case PPCISD::DYNALLOC:      return "PPCISD::DYNALLOC";
-  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
-  case PPCISD::SRL:           return "PPCISD::SRL";
-  case PPCISD::SRA:           return "PPCISD::SRA";
-  case PPCISD::SHL:           return "PPCISD::SHL";
-  case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
-  case PPCISD::STD_32:        return "PPCISD::STD_32";
-  case PPCISD::CALL_ELF:      return "PPCISD::CALL_ELF";
-  case PPCISD::CALL_Macho:    return "PPCISD::CALL_Macho";
-  case PPCISD::MTCTR:         return "PPCISD::MTCTR";
-  case PPCISD::BCTRL_Macho:   return "PPCISD::BCTRL_Macho";
-  case PPCISD::BCTRL_ELF:     return "PPCISD::BCTRL_ELF";
-  case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
-  case PPCISD::MFCR:          return "PPCISD::MFCR";
-  case PPCISD::VCMP:          return "PPCISD::VCMP";
-  case PPCISD::VCMPo:         return "PPCISD::VCMPo";
-  case PPCISD::LBRX:          return "PPCISD::LBRX";
-  case PPCISD::STBRX:         return "PPCISD::STBRX";
-  case PPCISD::LARX:          return "PPCISD::LARX";
-  case PPCISD::STCX:          return "PPCISD::STCX";
-  case PPCISD::CMP_UNRESERVE: return "PPCISD::CMP_UNRESERVE";
-  case PPCISD::COND_BRANCH:   return "PPCISD::COND_BRANCH";
-  case PPCISD::MFFS:          return "PPCISD::MFFS";
-  case PPCISD::MTFSB0:        return "PPCISD::MTFSB0";
-  case PPCISD::MTFSB1:        return "PPCISD::MTFSB1";
-  case PPCISD::FADDRTZ:       return "PPCISD::FADDRTZ";
-  case PPCISD::MTFSF:         return "PPCISD::MTFSF";
-  case PPCISD::TAILCALL:      return "PPCISD::TAILCALL";
-  case PPCISD::TC_RETURN:     return "PPCISD::TC_RETURN";
+  case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::FCFID:           return "PPCISD::FCFID";
+  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
+  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
+  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
+  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
+  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
+  case PPCISD::VPERM:           return "PPCISD::VPERM";
+  case PPCISD::Hi:              return "PPCISD::Hi";
+  case PPCISD::Lo:              return "PPCISD::Lo";
+  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
+  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
+  case PPCISD::SRL:             return "PPCISD::SRL";
+  case PPCISD::SRA:             return "PPCISD::SRA";
+  case PPCISD::SHL:             return "PPCISD::SHL";
+  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
+  case PPCISD::STD_32:          return "PPCISD::STD_32";
+  case PPCISD::CALL_ELF:        return "PPCISD::CALL_ELF";
+  case PPCISD::CALL_Macho:      return "PPCISD::CALL_Macho";
+  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
+  case PPCISD::BCTRL_Macho:     return "PPCISD::BCTRL_Macho";
+  case PPCISD::BCTRL_ELF:       return "PPCISD::BCTRL_ELF";
+  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
+  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::VCMP:            return "PPCISD::VCMP";
+  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
+  case PPCISD::LBRX:            return "PPCISD::LBRX";
+  case PPCISD::STBRX:           return "PPCISD::STBRX";
+  case PPCISD::ATOMIC_LOAD_ADD: return "PPCISD::ATOMIC_LOAD_ADD";
+  case PPCISD::ATOMIC_CMP_SWAP: return "PPCISD::ATOMIC_CMP_SWAP";
+  case PPCISD::ATOMIC_SWAP:     return "PPCISD::ATOMIC_SWAP";
+  case PPCISD::LARX:            return "PPCISD::LARX";
+  case PPCISD::STCX:            return "PPCISD::STCX";
+  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
+  case PPCISD::MFFS:            return "PPCISD::MFFS";
+  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
+  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
+  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
+  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
+  case PPCISD::TAILCALL:        return "PPCISD::TAILCALL";
+  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   }
 }
 
@@ -2726,33 +2727,13 @@
   SDOperand Ptr     = Op.getOperand(1);
   SDOperand Incr    = Op.getOperand(2);
 
-  // Issue a "load and reserve".
-  std::vector<MVT> VTs;
-  VTs.push_back(VT);
-  VTs.push_back(MVT::Other);
-
-  SDOperand Label  = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
   SDOperand Ops[] = {
-    Chain,               // Chain
-    Ptr,                 // Ptr
-    Label,               // Label
+    Chain,
+    Ptr,
+    Incr,
   };
-  SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
-  Chain = Load.getValue(1);
-
-  // Compute new value.
-  SDOperand NewVal  = DAG.getNode(ISD::ADD, VT, Load, Incr);
-
-  // Issue a "store and check".
-  SDOperand Ops2[] = {
-    Chain,               // Chain
-    NewVal,              // Value
-    Ptr,                 // Ptr
-    Label,               // Label
-  };
-  SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
-  SDOperand OutOps[] = { Load, Store };
-  return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+  return DAG.getNode(PPCISD::ATOMIC_LOAD_ADD, VTs, Ops, 3);
 }
 
 SDOperand PPCTargetLowering::LowerAtomicCMP_SWAP(SDOperand Op, SelectionDAG &DAG) {
@@ -2762,39 +2743,14 @@
   SDOperand NewVal  = Op.getOperand(2);
   SDOperand OldVal  = Op.getOperand(3);
 
-  // Issue a "load and reserve".
-  std::vector<MVT> VTs;
-  VTs.push_back(VT);
-  VTs.push_back(MVT::Other);
-
-  SDOperand Label  = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
   SDOperand Ops[] = {
-    Chain,               // Chain
-    Ptr,                 // Ptr
-    Label,               // Label
+    Chain,
+    Ptr,
+    OldVal,
+    NewVal,
   };
-  SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
-  Chain = Load.getValue(1);
-
-  // Compare and unreserve if not equal.
-  SDOperand Ops2[] = {
-    Chain,               // Chain
-    OldVal,              // Old value
-    Load,                // Value in memory
-    Label,               // Label
-  };
-  Chain = DAG.getNode(PPCISD::CMP_UNRESERVE, MVT::Other, Ops2, 4);
-
-  // Issue a "store and check".
-  SDOperand Ops3[] = {
-    Chain,               // Chain
-    NewVal,              // Value
-    Ptr,                 // Ptr
-    Label,               // Label
-  };
-  SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops3, 4);
-  SDOperand OutOps[] = { Load, Store };
-  return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+  return DAG.getNode(PPCISD::ATOMIC_CMP_SWAP, VTs, Ops, 4);
 }
 
 SDOperand PPCTargetLowering::LowerAtomicSWAP(SDOperand Op, SelectionDAG &DAG) {
@@ -2803,30 +2759,13 @@
   SDOperand Ptr     = Op.getOperand(1);
   SDOperand NewVal  = Op.getOperand(2);
 
-  // Issue a "load and reserve".
-  std::vector<MVT> VTs;
-  VTs.push_back(VT);
-  VTs.push_back(MVT::Other);
-
-  SDOperand Label  = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32);
+  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
   SDOperand Ops[] = {
-    Chain,               // Chain
-    Ptr,                 // Ptr
-    Label,               // Label
+    Chain,
+    Ptr,
+    NewVal,
   };
-  SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3);
-  Chain = Load.getValue(1);
-
-  // Issue a "store and check".
-  SDOperand Ops2[] = {
-    Chain,               // Chain
-    NewVal,              // Value
-    Ptr,                 // Ptr
-    Label,               // Label
-  };
-  SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4);
-  SDOperand OutOps[] = { Load, Store };
-  return DAG.getMergeValues(DAG.getVTList(VT, MVT::Other), OutOps, 2);
+  return DAG.getNode(PPCISD::ATOMIC_SWAP, VTs, Ops, 3);
 }
 
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
@@ -3980,59 +3919,198 @@
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||
-          MI->getOpcode() == PPC::SELECT_CC_I8 ||
-          MI->getOpcode() == PPC::SELECT_CC_F4 ||
-          MI->getOpcode() == PPC::SELECT_CC_F8 ||
-          MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
-         "Unexpected instr type to insert");
-  
-  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
-  // control-flow pattern.  The incoming instruction knows the destination vreg
-  // to set, the condition code register to branch on, the true/false values to
-  // select between, and a branch opcode to use.
+
+  // To "insert" these instructions we actually have to insert their
+  // control-flow patterns.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   ilist<MachineBasicBlock>::iterator It = BB;
   ++It;
-  
-  //  thisMBB:
-  //  ...
-  //   TrueVal = ...
-  //   cmpTY ccX, r1, r2
-  //   bCC copy1MBB
-  //   fallthrough --> copy0MBB
-  MachineBasicBlock *thisMBB = BB;
-  MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
-  unsigned SelectPred = MI->getOperand(4).getImm();
-  BuildMI(BB, TII->get(PPC::BCC))
-    .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
-  MachineFunction *F = BB->getParent();
-  F->getBasicBlockList().insert(It, copy0MBB);
-  F->getBasicBlockList().insert(It, sinkMBB);
-  // Update machine-CFG edges by transferring all successors of the current
-  // block to the new block which will contain the Phi node for the select.
-  sinkMBB->transferSuccessors(BB);
-  // Next, add the true and fallthrough blocks as its successors.
-  BB->addSuccessor(copy0MBB);
-  BB->addSuccessor(sinkMBB);
-  
-  //  copy0MBB:
-  //   %FalseValue = ...
-  //   # fallthrough to sinkMBB
-  BB = copy0MBB;
-  
-  // Update machine-CFG edges
-  BB->addSuccessor(sinkMBB);
-  
-  //  sinkMBB:
-  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-  //  ...
-  BB = sinkMBB;
-  BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
-    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
-    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
 
+  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+      MI->getOpcode() == PPC::SELECT_CC_I8 ||
+      MI->getOpcode() == PPC::SELECT_CC_F4 ||
+      MI->getOpcode() == PPC::SELECT_CC_F8 ||
+      MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+    // The incoming instruction knows the destination vreg to set, the
+    // condition code register to branch on, the true/false values to
+    // select between, and a branch opcode to use.
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB = BB;
+    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+    unsigned SelectPred = MI->getOperand(4).getImm();
+    BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    MachineFunction *F = BB->getParent();
+    F->getBasicBlockList().insert(It, copy0MBB);
+    F->getBasicBlockList().insert(It, sinkMBB);
+    // Update machine-CFG edges by transferring all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    sinkMBB->transferSuccessors(BB);
+    // Next, add the true and fallthrough blocks as its successors.
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+    
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+    
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+    
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+  }
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32 ||
+           MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) {
+    bool is64bit = MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64;
+
+    unsigned dest = MI->getOperand(0).getReg();
+    unsigned ptrA = MI->getOperand(1).getReg();
+    unsigned ptrB = MI->getOperand(2).getReg();
+    unsigned incr = MI->getOperand(3).getReg();
+
+    MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+    MachineFunction *F = BB->getParent();
+    F->getBasicBlockList().insert(It, loopMBB);
+    F->getBasicBlockList().insert(It, exitMBB);
+    exitMBB->transferSuccessors(BB);
+
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loopMBB);
+
+    //  loopMBB:
+    //   l[wd]arx dest, ptr
+    //   add r0, dest, incr
+    //   st[wd]cx. r0, ptr
+    //   bne- loopMBB
+    //   fallthrough --> exitMBB
+    BB = loopMBB;
+    BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(is64bit ? PPC::ADD4 : PPC::ADD8), PPC::R0)
+      .addReg(incr).addReg(dest);
+    BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(PPC::R0).addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);    
+    BB->addSuccessor(loopMBB);
+    BB->addSuccessor(exitMBB);
+    
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+  }
+  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned oldval = MI->getOperand(3).getReg();
+    unsigned newval = MI->getOperand(4).getReg();
+
+    MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+    MachineFunction *F = BB->getParent();
+    F->getBasicBlockList().insert(It, loopMBB);
+    F->getBasicBlockList().insert(It, exitMBB);
+    exitMBB->transferSuccessors(BB);
+
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loopMBB);
+
+    //  loopMBB:
+    //   l[wd]arx dest, ptr
+    //   cmp[wd] dest, oldval
+    //   bne- exitMBB
+    //   st[wd]cx. newval, ptr
+    //   bne- loopMBB
+    //   fallthrough --> exitMBB
+    BB = loopMBB;
+    BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+      .addReg(oldval).addReg(dest);
+    BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(exitMBB);
+    BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(newval).addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);    
+    BB->addSuccessor(loopMBB);
+    BB->addSuccessor(exitMBB);
+    
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+  }
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32 ||
+           MI->getOpcode() == PPC::ATOMIC_SWAP_I64) {
+    bool is64bit = MI->getOpcode() == PPC::ATOMIC_SWAP_I64;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned newval = MI->getOperand(3).getReg();
+
+    MachineBasicBlock *loopMBB = new MachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = new MachineBasicBlock(LLVM_BB);
+
+    MachineFunction *F = BB->getParent();
+    F->getBasicBlockList().insert(It, loopMBB);
+    F->getBasicBlockList().insert(It, exitMBB);
+    exitMBB->transferSuccessors(BB);
+
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loopMBB);
+
+    //  loopMBB:
+    //   l[wd]arx dest, ptr
+    //   st[wd]cx. newval, ptr
+    //   bne- loopMBB
+    //   fallthrough --> exitMBB
+    BB = loopMBB;
+    BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(newval).addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);    
+    BB->addSuccessor(loopMBB);
+    BB->addSuccessor(exitMBB);
+    
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+  }
+  else {
+    assert(0 && "Unexpected instr type to insert");
+  }
+
   delete MI;   // The pseudo instruction is gone now.
   return BB;
 }


More information about the llvm-dev mailing list