<span class="Apple-style-span" style="font-family: monospace; font-size: 13px; ">Hey Guys,<br><br>I have been working on Atomic NAND. Here is a patch that allows the<br>compiler writer to select a [NOT AND] implementation, while retaining<br>
[NEGATE and AND] as the default implementation for those that do not<br>want the current GCC implementation (i.e. GCC v4.4 and later).<br><br>upstream/llvm> svn diff<br>Index: lib/Target/X86/X86ISelLowering.cpp<br>===================================================================<br>
--- lib/Target/X86/X86ISelLowering.cpp (revision 135054)<br>+++ lib/Target/X86/X86ISelLowering.cpp (working copy)<br>@@ -9710,7 +9710,8 @@<br> unsigned notOpc,<br>
unsigned EAXreg,<br> <br>TargetRegisterClass *RC,<br>- bool invSrc) const {<br>
+ bool invSrc,<br>+ bool invRes) const {<br> // For the atomic bitwise operator, we generate<br> // thisMBB:<br> // newMBB:<br>
@@ -9783,13 +9784,20 @@<br> MIB.addReg(tt);<br> (*MIB).addOperand(*argOpers[valArgIndx]);<br> <br>+ unsigned tr = F->getRegInfo().createVirtualRegister(RC);<br>+ if (invRes) {<br>+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), tr).addReg(t2);<br>
+ }<br>+ else<br>+ tr = t2;<br>+<br> MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);<br> MIB.addReg(t1);<br> <br> MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));<br> for (int i=0; i <= lastAddrIndx; ++i)<br>
(*MIB).addOperand(*argOpers[i]);<br>- MIB.addReg(t2);<br>+ MIB.addReg(tr);<br> assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");<br> (*MIB).setMemRefs(bInstr->memoperands_begin(),<br>
bInstr->memoperands_end());<br>@@ -9812,7 +9820,8 @@<br> unsigned regOpcH,<br> unsigned immOpcL,<br>
unsigned immOpcH,<br>- bool invSrc) const {<br>+ bool invSrc,<br>
+ bool invRes) const {<br> // For the atomic bitwise operator, we generate<br> // thisMBB (instructions are in pairs, except cmpxchg8b)<br> // ld t1,t2 = [bitinstr.addr]<br>@@ -9939,15 +9948,26 @@<br>
MIB.addReg(t2);<br> (*MIB).addOperand(*argOpers[valArgIndx + 1]);<br> <br>+ unsigned trl = F->getRegInfo().createVirtualRegister(RC);<br>+ unsigned trh = F->getRegInfo().createVirtualRegister(RC);<br>+ if (invRes) {<br>
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trl).addReg(t5);<br>+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trh).addReg(t6);<br>+ }<br>+ else {<br>+ trl = t5;<br>+ trh = t6;<br>+ }<br>+<br> MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);<br>
MIB.addReg(t1);<br> MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);<br> MIB.addReg(t2);<br> <br> MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);<br>- MIB.addReg(t5);<br>
+ MIB.addReg(trl);<br> MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);<br>- MIB.addReg(t6);<br>+ MIB.addReg(trh);<br> <br> MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));<br> for (int i=0; i <= lastAddrIndx; ++i)<br>
Index: lib/Target/X86/X86ISelLowering.h<br>===================================================================<br>--- lib/Target/X86/X86ISelLowering.h (revision 135054)<br>+++ lib/Target/X86/X86ISelLowering.h (working copy)<br>
@@ -896,7 +896,8 @@<br> unsigned notOpc,<br> unsigned EAXreg,<br> TargetRegisterClass<br>
*RC,<br>- bool invSrc =<br>false) const;<br>+ bool invSrc = false,<br>+ bool invRes = false) const;<br>
<br> MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(<br> MachineInstr *BInstr,<br>@@ -905,7 +906,8 @@<br> unsigned regOpcH,<br>
unsigned immOpcL,<br> unsigned immOpcH,<br>- bool invSrc =<br>false) const;<br>
+ bool invSrc = false,<br>+ bool invRes = false) const;<br> <br> /// Utility function to emit atomic min and max. It takes the min/max<br>
/// instruction to expand, the associated basic block, and the<br>associated<br><br><br>With the above patch, the current LLVM implementation will be selected.<br>To enable the [NOT AND] mode, the following changes will need to be made<br>
by the compiler writer:<br><br><br>upstream/llvm> svn diff<br>Index: lib/Target/X86/X86ISelLowering.cpp<br>===================================================================<br>--- lib/Target/X86/X86ISelLowering.cpp (revision 135080)<br>
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)<br>@@ -10835,7 +10835,8 @@<br> X86::AND32ri, X86::MOV32rm,<br> X86::LCMPXCHG32,<br>
X86::NOT32r, X86::EAX,<br>- X86::GR32RegisterClass,<br>true);<br>+ X86::GR32RegisterClass,<br>
+ false, true);<br> case X86::ATOMMIN32:<br> return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);<br> case X86::ATOMMAX32:<br>@@ -10868,7 +10869,8 @@<br> X86::AND16ri, X86::MOV16rm,<br>
X86::LCMPXCHG16,<br> X86::NOT16r, X86::AX,<br>- X86::GR16RegisterClass,<br>true);<br>
+ X86::GR16RegisterClass,<br>+ false, true);<br> case X86::ATOMMIN16:<br> return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);<br>
case X86::ATOMMAX16:<br>@@ -10901,7 +10903,8 @@<br> X86::AND8ri, X86::MOV8rm,<br> X86::LCMPXCHG8,<br> X86::NOT8r, X86::AL,<br>
- X86::GR8RegisterClass,<br>true);<br>+ X86::GR8RegisterClass,<br>+ false, true);<br> // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.<br>
// This group is for 64-bit host.<br> case X86::ATOMAND64:<br>@@ -10927,7 +10930,8 @@<br> X86::AND64ri32,<br>X86::MOV64rm,<br> X86::LCMPXCHG64,<br>
X86::NOT64r, X86::RAX,<br>- X86::GR64RegisterClass,<br>true);<br>+ X86::GR64RegisterClass,<br>
+ false, true);<br> case X86::ATOMMIN64:<br> return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);<br> case X86::ATOMMAX64:<br>@@ -10957,7 +10961,7 @@<br> return EmitAtomicBit6432WithCustomInserter(MI, BB,<br>
X86::AND32rr, X86::AND32rr,<br> X86::AND32ri, X86::AND32ri,<br>- true);<br>+ false, true);<br>
case X86::ATOMADD6432:<br> return EmitAtomicBit6432WithCustomInserter(MI, BB,<br> X86::ADD32rr, X86::ADC32rr,<br><br><br>Thanks,<br>Cameron</span>