[llvm-commits] [PATCH] Atomic NAND options

Thu Jul 14 09:41:28 PDT 2011

On Jul 14, 2011, at 9:29 AM, Chris Lattner wrote:

> 
> On Jul 14, 2011, at 8:03 AM, Cameron McInally wrote:
> 
>> Hey Guys,
>> 
>> I have been working on Atomic NAND. Here is a patch that allows the
>> compiler writer to select a [NOT AND] implementation, while retaining
>> [NEGATE and AND] as the default implementation for those that do not
>> want the current GCC implementation (i.e. GCC v4.4 and later).
> 
> Hi Cameron,
> 
> Is there a reason to support the broken pre-gcc-4.4 implementation at all?
> 

If we change (either optionally or unconditionally), we should also make sure to change all targets that implement the intrinsics, not just X86. It would be really nasty for the behaviour to be target specific.

-Jim

> 
>> 
>> upstream/llvm> svn diff
>> Index: lib/Target/X86/X86ISelLowering.cpp
>> ===================================================================
>> --- lib/Target/X86/X86ISelLowering.cpp    (revision 135054)
>> +++ lib/Target/X86/X86ISelLowering.cpp    (working copy)
>> @@ -9710,7 +9710,8 @@
>>                                                         unsigned notOpc,
>>                                                         unsigned EAXreg,
>>                                                        
>> TargetRegisterClass *RC,
>> -                                                       bool invSrc) const {
>> +                                                       bool invSrc,
>> +                                bool invRes) const {
>>    // For the atomic bitwise operator, we generate
>>    //   thisMBB:
>>    //   newMBB:
>> @@ -9783,13 +9784,20 @@
>>    MIB.addReg(tt);
>>    (*MIB).addOperand(*argOpers[valArgIndx]);
>>  
>> +  unsigned tr = F->getRegInfo().createVirtualRegister(RC);
>> +  if (invRes) {
>> +    MIB = BuildMI(newMBB, dl, TII->get(notOpc), tr).addReg(t2);
>> +  }
>> +  else
>> +    tr = t2;
>> +
>>    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
>>    MIB.addReg(t1);
>>  
>>    MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
>>    for (int i=0; i <= lastAddrIndx; ++i)
>>      (*MIB).addOperand(*argOpers[i]);
>> -  MIB.addReg(t2);
>> +  MIB.addReg(tr);
>>    assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
>>    (*MIB).setMemRefs(bInstr->memoperands_begin(),
>>                      bInstr->memoperands_end());
>> @@ -9812,7 +9820,8 @@
>>                                                         unsigned regOpcH,
>>                                                         unsigned immOpcL,
>>                                                         unsigned immOpcH,
>> -                                                       bool invSrc) const {
>> +                                                       bool invSrc,
>> +                                bool invRes) const {
>>    // For the atomic bitwise operator, we generate
>>    //   thisMBB (instructions are in pairs, except cmpxchg8b)
>>    //     ld t1,t2 = [bitinstr.addr]
>> @@ -9939,15 +9948,26 @@
>>      MIB.addReg(t2);
>>    (*MIB).addOperand(*argOpers[valArgIndx + 1]);
>>  
>> +  unsigned trl = F->getRegInfo().createVirtualRegister(RC);
>> +  unsigned trh = F->getRegInfo().createVirtualRegister(RC);
>> +  if (invRes) {
>> +    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trl).addReg(t5);
>> +    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), trh).addReg(t6);
>> +  }
>> +  else {
>> +    trl = t5;
>> +    trh = t6;
>> +  }
>> +
>>    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
>>    MIB.addReg(t1);
>>    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
>>    MIB.addReg(t2);
>>  
>>    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
>> -  MIB.addReg(t5);
>> +  MIB.addReg(trl);
>>    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
>> -  MIB.addReg(t6);
>> +  MIB.addReg(trh);
>>  
>>    MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
>>    for (int i=0; i <= lastAddrIndx; ++i)
>> Index: lib/Target/X86/X86ISelLowering.h
>> ===================================================================
>> --- lib/Target/X86/X86ISelLowering.h    (revision 135054)
>> +++ lib/Target/X86/X86ISelLowering.h    (working copy)
>> @@ -896,7 +896,8 @@
>>                                                      unsigned notOpc,
>>                                                      unsigned EAXreg,
>>                                                      TargetRegisterClass
>> *RC,
>> -                                                    bool invSrc =
>> false) const;
>> +                                                    bool invSrc = false,
>> +                             bool invRes = false) const;
>>  
>>      MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
>>                                                      MachineInstr *BInstr,
>> @@ -905,7 +906,8 @@
>>                                                      unsigned regOpcH,
>>                                                      unsigned immOpcL,
>>                                                      unsigned immOpcH,
>> -                                                    bool invSrc =
>> false) const;
>> +                                                    bool invSrc = false,
>> +                             bool invRes = false) const;
>>  
>>      /// Utility function to emit atomic min and max.  It takes the min/max
>>      /// instruction to expand, the associated basic block, and the
>> associated
>> 
>> 
>> With the above patch, the current LLVM implementation will be selected.
>> To enable the [NOT AND] mode, the following changes will need to be made
>> by the compiler writer:
>> 
>> 
>> upstream/llvm> svn diff
>> Index: lib/Target/X86/X86ISelLowering.cpp
>> ===================================================================
>> --- lib/Target/X86/X86ISelLowering.cpp    (revision 135080)
>> +++ lib/Target/X86/X86ISelLowering.cpp    (working copy)
>> @@ -10835,7 +10835,8 @@
>>                                                 X86::AND32ri, X86::MOV32rm,
>>                                                 X86::LCMPXCHG32,
>>                                                 X86::NOT32r, X86::EAX,
>> -                                               X86::GR32RegisterClass,
>> true);
>> +                                               X86::GR32RegisterClass,
>> +                            false, true);
>>    case X86::ATOMMIN32:
>>      return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
>>    case X86::ATOMMAX32:
>> @@ -10868,7 +10869,8 @@
>>                                                 X86::AND16ri, X86::MOV16rm,
>>                                                 X86::LCMPXCHG16,
>>                                                 X86::NOT16r, X86::AX,
>> -                                               X86::GR16RegisterClass,
>> true);
>> +                                               X86::GR16RegisterClass,
>> +                            false, true);
>>    case X86::ATOMMIN16:
>>      return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
>>    case X86::ATOMMAX16:
>> @@ -10901,7 +10903,8 @@
>>                                                 X86::AND8ri, X86::MOV8rm,
>>                                                 X86::LCMPXCHG8,
>>                                                 X86::NOT8r, X86::AL,
>> -                                               X86::GR8RegisterClass,
>> true);
>> +                                               X86::GR8RegisterClass,
>> +                            false, true);
>>    // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
>>    // This group is for 64-bit host.
>>    case X86::ATOMAND64:
>> @@ -10927,7 +10930,8 @@
>>                                                 X86::AND64ri32,
>> X86::MOV64rm,
>>                                                 X86::LCMPXCHG64,
>>                                                 X86::NOT64r, X86::RAX,
>> -                                               X86::GR64RegisterClass,
>> true);
>> +                                               X86::GR64RegisterClass,
>> +                            false, true);
>>    case X86::ATOMMIN64:
>>      return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
>>    case X86::ATOMMAX64:
>> @@ -10957,7 +10961,7 @@
>>      return EmitAtomicBit6432WithCustomInserter(MI, BB,
>>                                                 X86::AND32rr, X86::AND32rr,
>>                                                 X86::AND32ri, X86::AND32ri,
>> -                                               true);
>> +                                               false, true);
>>    case X86::ATOMADD6432:
>>      return EmitAtomicBit6432WithCustomInserter(MI, BB,
>>                                                 X86::ADD32rr, X86::ADC32rr,
>> 
>> 
>> Thanks,
>> Cameron _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits