<span class="Apple-style-span" style="font-family: monospace; font-size: 13px; ">Hey Guys,<br><br>I have been working on Atomic NAND. Here is a patch that allows the<br>compiler writer to select a [NOT AND] implementation, while retaining<br>

[NEGATE and AND] as the default implementation for those that do not<br>want the current GCC implementation (i.e. GCC v4.4 and later).<br><br>upstream/llvm> svn diff<br>Index: lib/Target/X86/X86ISelLowering.cpp<br>===================================================================<br>

--- lib/Target/X86/X86ISelLowering.cpp    (revision 135054)<br>+++ lib/Target/X86/X86ISelLowering.cpp    (working copy)<br>@@ -9710,7 +9710,8 @@<br>                                                        unsigned notOpc,<br>

                                                        unsigned EAXreg,<br>                                                       <br>TargetRegisterClass *RC,<br>-                                                       bool invSrc) const {<br>

+                                                       bool invSrc,<br>+                                bool invRes) const {<br>   // For the atomic bitwise operator, we generate<br>   //   thisMBB:<br>   //   newMBB:<br>

@@ -9783,13 +9784,20 @@<br>   MIB.addReg(tt);<br>   (*MIB).addOperand(*argOpers[valArgIndx]);<br> <br>+  unsigned tr = F->getRegInfo().createVirtualRegister(RC);<br>+  if (invRes) {<br>+    MIB = BuildMI(newMBB, dl, TII->get(notOpc), tr).addReg(t2);<br>

+  }<br>+  else<br>+    tr = t2;<br>+<br>   MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);<br>   MIB.addReg(t1);<br> <br>   MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));<br>   for (int i=0; i <= lastAddrIndx; ++i)<br>

     (*MIB).addOperand(*argOpers[i]);<br>-  MIB.addReg(t2);<br>+  MIB.addReg(tr);<br>   assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");<br>   (*MIB).setMemRefs(bInstr->memoperands_begin(),<br>

                     bInstr->memoperands_end());<br>@@ -9812,7 +9820,8 @@<br>                                                        unsigned regOpcH,<br>                                                        unsigned immOpcL,<br>

                                                        unsigned immOpcH,<br>-                                                       bool invSrc) const {<br>+                                                       bool invSrc,<br>

+                                bool invRes) const {<br>   // For the atomic bitwise operator, we generate<br>   //   thisMBB (instructions are in pairs, except cmpxchg8b)<br>   //     ld t1,t2 = [bitinstr.addr]<br>@@ -9939,15 +9948,26 @@<br>

     MIB.addReg(t2);<br>   (*MIB).addOperand(*argOpers[valArgIndx + 1]);<br> <br>+  unsigned trl = F->getRegInfo().createVirtualRegister(RC);<br>+  unsigned trh = F->getRegInfo().createVirtualRegister(RC);<br>+  if (invRes) {<br>

+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trl).addReg(t5);<br>+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trh).addReg(t6);<br>+  }<br>+  else {<br>+    trl = t5;<br>+    trh = t6;<br>+  }<br>+<br>   MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);<br>

   MIB.addReg(t1);<br>   MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);<br>   MIB.addReg(t2);<br> <br>   MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);<br>-  MIB.addReg(t5);<br>

+  MIB.addReg(trl);<br>   MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);<br>-  MIB.addReg(t6);<br>+  MIB.addReg(trh);<br> <br>   MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));<br>   for (int i=0; i <= lastAddrIndx; ++i)<br>

Index: lib/Target/X86/X86ISelLowering.h<br>===================================================================<br>--- lib/Target/X86/X86ISelLowering.h    (revision 135054)<br>+++ lib/Target/X86/X86ISelLowering.h    (working copy)<br>

@@ -896,7 +896,8 @@<br>                                                     unsigned notOpc,<br>                                                     unsigned EAXreg,<br>                                                     TargetRegisterClass<br>

*RC,<br>-                                                    bool invSrc =<br>false) const;<br>+                                                    bool invSrc = false,<br>+                             bool invRes = false) const;<br>

 <br>     MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(<br>                                                     MachineInstr *BInstr,<br>@@ -905,7 +906,8 @@<br>                                                     unsigned regOpcH,<br>

                                                     unsigned immOpcL,<br>                                                     unsigned immOpcH,<br>-                                                    bool invSrc =<br>false) const;<br>

+                                                    bool invSrc = false,<br>+                             bool invRes = false) const;<br> <br>     /// Utility function to emit atomic min and max.  It takes the min/max<br>

     /// instruction to expand, the associated basic block, and the<br>associated<br><br><br>With the above patch, the current LLVM implementation will be selected.<br>To enable the [NOT AND] mode, the following changes will need to be made<br>

by the compiler writer:<br><br><br>upstream/llvm> svn diff<br>Index: lib/Target/X86/X86ISelLowering.cpp<br>===================================================================<br>--- lib/Target/X86/X86ISelLowering.cpp    (revision 135080)<br>

+++ lib/Target/X86/X86ISelLowering.cpp    (working copy)<br>@@ -10835,7 +10835,8 @@<br>                                                X86::AND32ri, X86::MOV32rm,<br>                                                X86::LCMPXCHG32,<br>

                                                X86::NOT32r, X86::EAX,<br>-                                               X86::GR32RegisterClass,<br>true);<br>+                                               X86::GR32RegisterClass,<br>

+                            false, true);<br>   case X86::ATOMMIN32:<br>     return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);<br>   case X86::ATOMMAX32:<br>@@ -10868,7 +10869,8 @@<br>                                                X86::AND16ri, X86::MOV16rm,<br>

                                                X86::LCMPXCHG16,<br>                                                X86::NOT16r, X86::AX,<br>-                                               X86::GR16RegisterClass,<br>true);<br>

+                                               X86::GR16RegisterClass,<br>+                            false, true);<br>   case X86::ATOMMIN16:<br>     return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);<br>

   case X86::ATOMMAX16:<br>@@ -10901,7 +10903,8 @@<br>                                                X86::AND8ri, X86::MOV8rm,<br>                                                X86::LCMPXCHG8,<br>                                                X86::NOT8r, X86::AL,<br>

-                                               X86::GR8RegisterClass,<br>true);<br>+                                               X86::GR8RegisterClass,<br>+                            false, true);<br>   // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.<br>

   // This group is for 64-bit host.<br>   case X86::ATOMAND64:<br>@@ -10927,7 +10930,8 @@<br>                                                X86::AND64ri32,<br>X86::MOV64rm,<br>                                                X86::LCMPXCHG64,<br>

                                                X86::NOT64r, X86::RAX,<br>-                                               X86::GR64RegisterClass,<br>true);<br>+                                               X86::GR64RegisterClass,<br>

+                            false, true);<br>   case X86::ATOMMIN64:<br>     return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);<br>   case X86::ATOMMAX64:<br>@@ -10957,7 +10961,7 @@<br>     return EmitAtomicBit6432WithCustomInserter(MI, BB,<br>

                                                X86::AND32rr, X86::AND32rr,<br>                                                X86::AND32ri, X86::AND32ri,<br>-                                               true);<br>+                                               false, true);<br>

   case X86::ATOMADD6432:<br>     return EmitAtomicBit6432WithCustomInserter(MI, BB,<br>                                                X86::ADD32rr, X86::ADC32rr,<br><br><br>Thanks,<br>Cameron</span>