[llvm-commits] [llvm] r164281 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrCompiler.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/2010-01-08-Atomic64Bug.ll test/CodeGen/X86/atomic16.ll test/CodeGen/X86/atomic32.ll test/CodeGen/X86/atomic64.ll test/CodeGen/X86/atomic6432.ll test/CodeGen/X86/atomic8.ll test/CodeGen/X86/atomic_op.ll test/CodeGen/X86/pr13458.ll
Michael Liao
michael.liao at intel.com
Mon Feb 25 10:57:11 PST 2013
Could you elaborate more on this? Do you have bug reported? - michael
On Sun, 2013-02-24 at 15:24 -0800, Cameron Zwarich wrote:
> I guess it is a bit late to say this now, but this commit has a lot of problems. It sticks store memory operands onto loads, copies kill flags from one use to multiple uses, and uses a physical register across basic blocks prior to register allocation. I have a patch ready for the first two, and I'll probably fix the last one and commit it.
>
> Cameron
>
> On Sep 19, 2012, at 8:06 PM, Michael Liao <michael.liao at intel.com> wrote:
>
> > Author: hliao
> > Date: Wed Sep 19 22:06:15 2012
> > New Revision: 164281
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=164281&view=rev
> > Log:
> > Re-work X86 code generation of atomic ops with spin-loop
> >
> > - Rewrite/merge pseudo-atomic instruction emitters to address the
> > following issue:
> > * Reduce one unnecessary load in spin-loop
> >
> > previously the spin-loop looks like
> >
> > thisMBB:
> > newMBB:
> > ld t1 = [bitinstr.addr]
> > op t2 = t1, [bitinstr.val]
> > not t3 = t2 (if Invert)
> > mov EAX = t1
> > lcs dest = [bitinstr.addr], t3 [EAX is implicit]
> > bz newMBB
> > fallthrough -->nextMBB
> >
> > the 'ld' at the beginning of newMBB should be lift out of the loop
> > as lcs (or CMPXCHG on x86) will load the current memory value into
> > EAX. This loop is refined as:
> >
> > thisMBB:
> > EAX = LOAD [MI.addr]
> > mainMBB:
> > t1 = OP [MI.val], EAX
> > LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> > JNE mainMBB
> > sinkMBB:
> >
> > * Remove immopc as, so far, all pseudo-atomic instructions has
> > all-register form only, there is no immedidate operand.
> >
> > * Remove unnecessary attributes/modifiers in pseudo-atomic instruction
> > td
> >
> > * Fix issues in PR13458
> >
> > - Add comprehensive tests on atomic ops on various data types.
> > NOTE: Some of them are turned off due to missing functionality.
> >
> > - Revise tests due to the new spin-loop generated.
> >
> >
> > Added:
> > llvm/trunk/test/CodeGen/X86/atomic16.ll
> > llvm/trunk/test/CodeGen/X86/atomic32.ll
> > llvm/trunk/test/CodeGen/X86/atomic64.ll
> > llvm/trunk/test/CodeGen/X86/atomic6432.ll
> > llvm/trunk/test/CodeGen/X86/atomic8.ll
> > llvm/trunk/test/CodeGen/X86/pr13458.ll
> > Modified:
> > llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> > llvm/trunk/lib/Target/X86/X86ISelLowering.h
> > llvm/trunk/lib/Target/X86/X86InstrCompiler.td
> > llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> > llvm/trunk/lib/Target/X86/X86InstrInfo.h
> > llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
> > llvm/trunk/test/CodeGen/X86/atomic_op.ll
> >
> > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Sep 19 22:06:15 2012
> > @@ -11911,385 +11911,498 @@
> > //===----------------------------------------------------------------------===//
> >
> > // private utility function
> > +
> > +// Get CMPXCHG opcode for the specified data type.
> > +static unsigned getCmpXChgOpcode(EVT VT) {
> > + switch (VT.getSimpleVT().SimpleTy) {
> > + case MVT::i8: return X86::LCMPXCHG8;
> > + case MVT::i16: return X86::LCMPXCHG16;
> > + case MVT::i32: return X86::LCMPXCHG32;
> > + case MVT::i64: return X86::LCMPXCHG64;
> > + default:
> > + break;
> > + }
> > + llvm_unreachable("Invalid operand size!");
> > +}
> > +
> > +// Get LOAD opcode for the specified data type.
> > +static unsigned getLoadOpcode(EVT VT) {
> > + switch (VT.getSimpleVT().SimpleTy) {
> > + case MVT::i8: return X86::MOV8rm;
> > + case MVT::i16: return X86::MOV16rm;
> > + case MVT::i32: return X86::MOV32rm;
> > + case MVT::i64: return X86::MOV64rm;
> > + default:
> > + break;
> > + }
> > + llvm_unreachable("Invalid operand size!");
> > +}
> > +
> > +// Get opcode of the non-atomic one from the specified atomic instruction.
> > +static unsigned getNonAtomicOpcode(unsigned Opc) {
> > + switch (Opc) {
> > + case X86::ATOMAND8: return X86::AND8rr;
> > + case X86::ATOMAND16: return X86::AND16rr;
> > + case X86::ATOMAND32: return X86::AND32rr;
> > + case X86::ATOMAND64: return X86::AND64rr;
> > + case X86::ATOMOR8: return X86::OR8rr;
> > + case X86::ATOMOR16: return X86::OR16rr;
> > + case X86::ATOMOR32: return X86::OR32rr;
> > + case X86::ATOMOR64: return X86::OR64rr;
> > + case X86::ATOMXOR8: return X86::XOR8rr;
> > + case X86::ATOMXOR16: return X86::XOR16rr;
> > + case X86::ATOMXOR32: return X86::XOR32rr;
> > + case X86::ATOMXOR64: return X86::XOR64rr;
> > + }
> > + llvm_unreachable("Unhandled atomic-load-op opcode!");
> > +}
> > +
> > +// Get opcode of the non-atomic one from the specified atomic instruction with
> > +// extra opcode.
> > +static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
> > + unsigned &ExtraOpc) {
> > + switch (Opc) {
> > + case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
> > + case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
> > + case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
> > + case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
> > + case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
> > + case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
> > + case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
> > + case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
> > + case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
> > + case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
> > + case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
> > + case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
> > + case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
> > + case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
> > + case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
> > + case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
> > + }
> > + llvm_unreachable("Unhandled atomic-load-op opcode!");
> > +}
> > +
> > +// Get opcode of the non-atomic one from the specified atomic instruction for
> > +// 64-bit data type on 32-bit target.
> > +static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
> > + switch (Opc) {
> > + case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
> > + case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
> > + case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
> > + case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
> > + case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
> > + case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
> > + }
> > + llvm_unreachable("Unhandled atomic-load-op opcode!");
> > +}
> > +
> > +// Get opcode of the non-atomic one from the specified atomic instruction for
> > +// 64-bit data type on 32-bit target with extra opcode.
> > +static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
> > + unsigned &HiOpc,
> > + unsigned &ExtraOpc) {
> > + switch (Opc) {
> > + case X86::ATOMNAND6432:
> > + ExtraOpc = X86::NOT32r;
> > + HiOpc = X86::AND32rr;
> > + return X86::AND32rr;
> > + }
> > + llvm_unreachable("Unhandled atomic-load-op opcode!");
> > +}
> > +
> > +// Get pseudo CMOV opcode from the specified data type.
> > +static unsigned getPseudoCMOVOpc(EVT VT) {
> > + switch (VT.getSimpleVT().SimpleTy) {
> > + case MVT::i16: return X86::CMOV_GR16;
> > + case MVT::i32: return X86::CMOV_GR32;
> > + default:
> > + break;
> > + }
> > + llvm_unreachable("Unknown CMOV opcode!");
> > +}
> > +
> > +// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
> > +// They will be translated into a spin-loop or compare-exchange loop from
> > +//
> > +// ...
> > +// dst = atomic-fetch-op MI.addr, MI.val
> > +// ...
> > +//
> > +// to
> > +//
> > +// ...
> > +// EAX = LOAD MI.addr
> > +// loop:
> > +// t1 = OP MI.val, EAX
> > +// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> > +// JNE loop
> > +// sink:
> > +// dst = EAX
> > +// ...
> > MachineBasicBlock *
> > -X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
> > - MachineBasicBlock *MBB,
> > - unsigned regOpc,
> > - unsigned immOpc,
> > - unsigned LoadOpc,
> > - unsigned CXchgOpc,
> > - unsigned notOpc,
> > - unsigned EAXreg,
> > - const TargetRegisterClass *RC,
> > - bool Invert) const {
> > - // For the atomic bitwise operator, we generate
> > - // thisMBB:
> > - // newMBB:
> > - // ld t1 = [bitinstr.addr]
> > - // op t2 = t1, [bitinstr.val]
> > - // not t3 = t2 (if Invert)
> > - // mov EAX = t1
> > - // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
> > - // bz newMBB
> > - // fallthrough -->nextMBB
> > +X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
> > + MachineBasicBlock *MBB) const {
> > const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> > - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> > - MachineFunction::iterator MBBIter = MBB;
> > - ++MBBIter;
> > + DebugLoc DL = MI->getDebugLoc();
> >
> > - /// First build the CFG
> > - MachineFunction *F = MBB->getParent();
> > - MachineBasicBlock *thisMBB = MBB;
> > - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - F->insert(MBBIter, newMBB);
> > - F->insert(MBBIter, nextMBB);
> > -
> > - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> > - nextMBB->splice(nextMBB->begin(), thisMBB,
> > - llvm::next(MachineBasicBlock::iterator(bInstr)),
> > - thisMBB->end());
> > - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> > -
> > - // Update thisMBB to fall through to newMBB
> > - thisMBB->addSuccessor(newMBB);
> > -
> > - // newMBB jumps to itself and fall through to nextMBB
> > - newMBB->addSuccessor(nextMBB);
> > - newMBB->addSuccessor(newMBB);
> > -
> > - // Insert instructions into newMBB based on incoming instruction
> > - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
> > - "unexpected number of operands");
> > - DebugLoc dl = bInstr->getDebugLoc();
> > - MachineOperand& destOper = bInstr->getOperand(0);
> > - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> > - int numArgs = bInstr->getNumOperands() - 1;
> > - for (int i=0; i < numArgs; ++i)
> > - argOpers[i] = &bInstr->getOperand(i+1);
> > -
> > - // x86 address has 4 operands: base, index, scale, and displacement
> > - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> > - int valArgIndx = lastAddrIndx + 1;
> > -
> > - unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
> > - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > -
> > - unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
> > - assert((argOpers[valArgIndx]->isReg() ||
> > - argOpers[valArgIndx]->isImm()) &&
> > - "invalid operand");
> > - if (argOpers[valArgIndx]->isReg())
> > - MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
> > - else
> > - MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
> > - MIB.addReg(t1);
> > - (*MIB).addOperand(*argOpers[valArgIndx]);
> > + MachineFunction *MF = MBB->getParent();
> > + MachineRegisterInfo &MRI = MF->getRegInfo();
> >
> > - unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
> > - if (Invert) {
> > - MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
> > - }
> > - else
> > - t3 = t2;
> > + const BasicBlock *BB = MBB->getBasicBlock();
> > + MachineFunction::iterator I = MBB;
> > + ++I;
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
> > - MIB.addReg(t1);
> > + assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
> > + "Unexpected number of operands");
> > +
> > + assert(MI->hasOneMemOperand() &&
> > + "Expected atomic-load-op to have one memoperand");
> > +
> > + // Memory Reference
> > + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
> > + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > - MIB.addReg(t3);
> > - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> > - (*MIB).setMemRefs(bInstr->memoperands_begin(),
> > - bInstr->memoperands_end());
> > + unsigned DstReg, SrcReg;
> > + unsigned MemOpndSlot;
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
> > - MIB.addReg(EAXreg);
> > + unsigned CurOp = 0;
> >
> > - // insert branch
> > - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> > + DstReg = MI->getOperand(CurOp++).getReg();
> > + MemOpndSlot = CurOp;
> > + CurOp += X86::AddrNumOperands;
> > + SrcReg = MI->getOperand(CurOp++).getReg();
> >
> > - bInstr->eraseFromParent(); // The pseudo instruction is gone now.
> > - return nextMBB;
> > -}
> > + const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
> > + EVT VT = *RC->vt_begin();
> > + unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
> >
> > -// private utility function: 64 bit atomics on 32 bit host.
> > -MachineBasicBlock *
> > -X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
> > - MachineBasicBlock *MBB,
> > - unsigned regOpcL,
> > - unsigned regOpcH,
> > - unsigned immOpcL,
> > - unsigned immOpcH,
> > - bool Invert) const {
> > - // For the atomic bitwise operator, we generate
> > - // thisMBB (instructions are in pairs, except cmpxchg8b)
> > - // ld t1,t2 = [bitinstr.addr]
> > - // newMBB:
> > - // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
> > - // op t5, t6 <- out1, out2, [bitinstr.val]
> > - // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
> > - // neg t7, t8 < t5, t6 (if Invert)
> > - // mov ECX, EBX <- t5, t6
> > - // mov EAX, EDX <- t1, t2
> > - // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
> > - // mov t3, t4 <- EAX, EDX
> > - // bz newMBB
> > - // result in out1, out2
> > - // fallthrough -->nextMBB
> > + unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
> > + unsigned LOADOpc = getLoadOpcode(VT);
> >
> > - const TargetRegisterClass *RC = &X86::GR32RegClass;
> > - const unsigned LoadOpc = X86::MOV32rm;
> > - const unsigned NotOpc = X86::NOT32r;
> > - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> > - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> > - MachineFunction::iterator MBBIter = MBB;
> > - ++MBBIter;
> > + // For the atomic load-arith operator, we generate
> > + //
> > + // thisMBB:
> > + // EAX = LOAD [MI.addr]
> > + // mainMBB:
> > + // t1 = OP MI.val, EAX
> > + // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> > + // JNE mainMBB
> > + // sinkMBB:
> >
> > - /// First build the CFG
> > - MachineFunction *F = MBB->getParent();
> > MachineBasicBlock *thisMBB = MBB;
> > - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - F->insert(MBBIter, newMBB);
> > - F->insert(MBBIter, nextMBB);
> > -
> > - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> > - nextMBB->splice(nextMBB->begin(), thisMBB,
> > - llvm::next(MachineBasicBlock::iterator(bInstr)),
> > - thisMBB->end());
> > - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> > -
> > - // Update thisMBB to fall through to newMBB
> > - thisMBB->addSuccessor(newMBB);
> > -
> > - // newMBB jumps to itself and fall through to nextMBB
> > - newMBB->addSuccessor(nextMBB);
> > - newMBB->addSuccessor(newMBB);
> > -
> > - DebugLoc dl = bInstr->getDebugLoc();
> > - // Insert instructions into newMBB based on incoming instruction
> > - // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
> > - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
> > - "unexpected number of operands");
> > - MachineOperand& dest1Oper = bInstr->getOperand(0);
> > - MachineOperand& dest2Oper = bInstr->getOperand(1);
> > - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> > - for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
> > - argOpers[i] = &bInstr->getOperand(i+2);
> > -
> > - // We use some of the operands multiple times, so conservatively just
> > - // clear any kill flags that might be present.
> > - if (argOpers[i]->isReg() && argOpers[i]->isUse())
> > - argOpers[i]->setIsKill(false);
> > - }
> > -
> > - // x86 address has 5 operands: base, index, scale, displacement, and segment.
> > - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> > -
> > - unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
> > - MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > - unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
> > - MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
> > - // add 4 to displacement.
> > - for (int i=0; i <= lastAddrIndx-2; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > - MachineOperand newOp3 = *(argOpers[3]);
> > - if (newOp3.isImm())
> > - newOp3.setImm(newOp3.getImm()+4);
> > - else
> > - newOp3.setOffset(newOp3.getOffset()+4);
> > - (*MIB).addOperand(newOp3);
> > - (*MIB).addOperand(*argOpers[lastAddrIndx]);
> > -
> > - // t3/4 are defined later, at the bottom of the loop
> > - unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
> > - unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
> > - BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
> > - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
> > - BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
> > - .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
> > -
> > - // The subsequent operations should be using the destination registers of
> > - // the PHI instructions.
> > - t1 = dest1Oper.getReg();
> > - t2 = dest2Oper.getReg();
> > -
> > - int valArgIndx = lastAddrIndx + 1;
> > - assert((argOpers[valArgIndx]->isReg() ||
> > - argOpers[valArgIndx]->isImm()) &&
> > - "invalid operand");
> > - unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
> > - unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
> > - if (argOpers[valArgIndx]->isReg())
> > - MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
> > - else
> > - MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
> > - if (regOpcL != X86::MOV32rr)
> > - MIB.addReg(t1);
> > - (*MIB).addOperand(*argOpers[valArgIndx]);
> > - assert(argOpers[valArgIndx + 1]->isReg() ==
> > - argOpers[valArgIndx]->isReg());
> > - assert(argOpers[valArgIndx + 1]->isImm() ==
> > - argOpers[valArgIndx]->isImm());
> > - if (argOpers[valArgIndx + 1]->isReg())
> > - MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
> > - else
> > - MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
> > - if (regOpcH != X86::MOV32rr)
> > - MIB.addReg(t2);
> > - (*MIB).addOperand(*argOpers[valArgIndx + 1]);
> > -
> > - unsigned t7, t8;
> > - if (Invert) {
> > - t7 = F->getRegInfo().createVirtualRegister(RC);
> > - t8 = F->getRegInfo().createVirtualRegister(RC);
> > - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
> > - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
> > - } else {
> > - t7 = t5;
> > - t8 = t6;
> > + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
> > + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
> > + MF->insert(I, mainMBB);
> > + MF->insert(I, sinkMBB);
> > +
> > + MachineInstrBuilder MIB;
> > +
> > + // Transfer the remainder of BB and its successor edges to sinkMBB.
> > + sinkMBB->splice(sinkMBB->begin(), MBB,
> > + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
> > + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
> > +
> > + // thisMBB:
> > + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
> > + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> > + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> > + MIB.setMemRefs(MMOBegin, MMOEnd);
> > +
> > + thisMBB->addSuccessor(mainMBB);
> > +
> > + // mainMBB:
> > + MachineBasicBlock *origMainMBB = mainMBB;
> > + mainMBB->addLiveIn(AccPhyReg);
> > +
> > + // Copy AccPhyReg as it is used more than once.
> > + unsigned AccReg = MRI.createVirtualRegister(RC);
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
> > + .addReg(AccPhyReg);
> > +
> > + unsigned t1 = MRI.createVirtualRegister(RC);
> > + unsigned Opc = MI->getOpcode();
> > + switch (Opc) {
> > + default:
> > + llvm_unreachable("Unhandled atomic-load-op opcode!");
> > + case X86::ATOMAND8:
> > + case X86::ATOMAND16:
> > + case X86::ATOMAND32:
> > + case X86::ATOMAND64:
> > + case X86::ATOMOR8:
> > + case X86::ATOMOR16:
> > + case X86::ATOMOR32:
> > + case X86::ATOMOR64:
> > + case X86::ATOMXOR8:
> > + case X86::ATOMXOR16:
> > + case X86::ATOMXOR32:
> > + case X86::ATOMXOR64: {
> > + unsigned ARITHOpc = getNonAtomicOpcode(Opc);
> > + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
> > + .addReg(AccReg);
> > + break;
> > + }
> > + case X86::ATOMNAND8:
> > + case X86::ATOMNAND16:
> > + case X86::ATOMNAND32:
> > + case X86::ATOMNAND64: {
> > + unsigned t2 = MRI.createVirtualRegister(RC);
> > + unsigned NOTOpc;
> > + unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
> > + BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
> > + .addReg(AccReg);
> > + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
> > + break;
> > + }
> > + case X86::ATOMMAX16:
> > + case X86::ATOMMAX32:
> > + case X86::ATOMMAX64:
> > + case X86::ATOMMIN16:
> > + case X86::ATOMMIN32:
> > + case X86::ATOMMIN64:
> > + case X86::ATOMUMAX16:
> > + case X86::ATOMUMAX32:
> > + case X86::ATOMUMAX64:
> > + case X86::ATOMUMIN16:
> > + case X86::ATOMUMIN32:
> > + case X86::ATOMUMIN64: {
> > + unsigned CMPOpc;
> > + unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
> > +
> > + BuildMI(mainMBB, DL, TII->get(CMPOpc))
> > + .addReg(SrcReg)
> > + .addReg(AccReg);
> > +
> > + if (Subtarget->hasCMov()) {
> > + // Native support
> > + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
> > + .addReg(SrcReg)
> > + .addReg(AccReg);
> > + } else {
> > + // Use pseudo select and lower them.
> > + assert((VT == MVT::i16 || VT == MVT::i32) &&
> > + "Invalid atomic-load-op transformation!");
> > + unsigned SelOpc = getPseudoCMOVOpc(VT);
> > + X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
> > + assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
> > + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
> > + .addReg(SrcReg).addReg(AccReg)
> > + .addImm(CC);
> > + mainMBB = EmitLoweredSelect(MIB, mainMBB);
> > + }
> > + break;
> > + }
> > }
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
> > + // Copy AccPhyReg back from virtual register.
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
> > + .addReg(AccReg);
> > +
> > + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
> > + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> > + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> > MIB.addReg(t1);
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
> > - MIB.addReg(t2);
> > + MIB.setMemRefs(MMOBegin, MMOEnd);
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
> > - MIB.addReg(t7);
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
> > - MIB.addReg(t8);
> > -
> > - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > -
> > - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> > - (*MIB).setMemRefs(bInstr->memoperands_begin(),
> > - bInstr->memoperands_end());
> > -
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
> > - MIB.addReg(X86::EAX);
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
> > - MIB.addReg(X86::EDX);
> > + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
> >
> > - // insert branch
> > - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> > + mainMBB->addSuccessor(origMainMBB);
> > + mainMBB->addSuccessor(sinkMBB);
> > +
> > + // sinkMBB:
> > + sinkMBB->addLiveIn(AccPhyReg);
> > +
> > + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> > + TII->get(TargetOpcode::COPY), DstReg)
> > + .addReg(AccPhyReg);
> >
> > - bInstr->eraseFromParent(); // The pseudo instruction is gone now.
> > - return nextMBB;
> > + MI->eraseFromParent();
> > + return sinkMBB;
> > }
> >
> > -// private utility function
> > +// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
> > +// instructions. They will be translated into a spin-loop or compare-exchange
> > +// loop from
> > +//
> > +// ...
> > +// dst = atomic-fetch-op MI.addr, MI.val
> > +// ...
> > +//
> > +// to
> > +//
> > +// ...
> > +// EAX = LOAD [MI.addr + 0]
> > +// EDX = LOAD [MI.addr + 4]
> > +// loop:
> > +// EBX = OP MI.val.lo, EAX
> > +// ECX = OP MI.val.hi, EDX
> > +// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
> > +// JNE loop
> > +// sink:
> > +// dst = EDX:EAX
> > +// ...
> > MachineBasicBlock *
> > -X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
> > - MachineBasicBlock *MBB,
> > - unsigned cmovOpc) const {
> > - // For the atomic min/max operator, we generate
> > - // thisMBB:
> > - // newMBB:
> > - // ld t1 = [min/max.addr]
> > - // mov t2 = [min/max.val]
> > - // cmp t1, t2
> > - // cmov[cond] t2 = t1
> > - // mov EAX = t1
> > - // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
> > - // bz newMBB
> > - // fallthrough -->nextMBB
> > - //
> > +X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
> > + MachineBasicBlock *MBB) const {
> > const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> > - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> > - MachineFunction::iterator MBBIter = MBB;
> > - ++MBBIter;
> > + DebugLoc DL = MI->getDebugLoc();
> > +
> > + MachineFunction *MF = MBB->getParent();
> > + MachineRegisterInfo &MRI = MF->getRegInfo();
> > +
> > + const BasicBlock *BB = MBB->getBasicBlock();
> > + MachineFunction::iterator I = MBB;
> > + ++I;
> > +
> > + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
> > + "Unexpected number of operands");
> > +
> > + assert(MI->hasOneMemOperand() &&
> > + "Expected atomic-load-op32 to have one memoperand");
> > +
> > + // Memory Reference
> > + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
> > + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
> > +
> > + unsigned DstLoReg, DstHiReg;
> > + unsigned SrcLoReg, SrcHiReg;
> > + unsigned MemOpndSlot;
> > +
> > + unsigned CurOp = 0;
> > +
> > + DstLoReg = MI->getOperand(CurOp++).getReg();
> > + DstHiReg = MI->getOperand(CurOp++).getReg();
> > + MemOpndSlot = CurOp;
> > + CurOp += X86::AddrNumOperands;
> > + SrcLoReg = MI->getOperand(CurOp++).getReg();
> > + SrcHiReg = MI->getOperand(CurOp++).getReg();
> > +
> > + const TargetRegisterClass *RC = &X86::GR32RegClass;
> > +
> > + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
> > + unsigned LOADOpc = X86::MOV32rm;
> > +
> > + // For the atomic load-arith operator, we generate
> > + //
> > + // thisMBB:
> > + // EAX = LOAD [MI.addr + 0]
> > + // EDX = LOAD [MI.addr + 4]
> > + // mainMBB:
> > + // EBX = OP MI.vallo, EAX
> > + // ECX = OP MI.valhi, EDX
> > + // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
> > + // JNE mainMBB
> > + // sinkMBB:
> >
> > - /// First build the CFG
> > - MachineFunction *F = MBB->getParent();
> > MachineBasicBlock *thisMBB = MBB;
> > - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> > - F->insert(MBBIter, newMBB);
> > - F->insert(MBBIter, nextMBB);
> > -
> > - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> > - nextMBB->splice(nextMBB->begin(), thisMBB,
> > - llvm::next(MachineBasicBlock::iterator(mInstr)),
> > - thisMBB->end());
> > - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> > -
> > - // Update thisMBB to fall through to newMBB
> > - thisMBB->addSuccessor(newMBB);
> > -
> > - // newMBB jumps to newMBB and fall through to nextMBB
> > - newMBB->addSuccessor(nextMBB);
> > - newMBB->addSuccessor(newMBB);
> > -
> > - DebugLoc dl = mInstr->getDebugLoc();
> > - // Insert instructions into newMBB based on incoming instruction
> > - assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
> > - "unexpected number of operands");
> > - MachineOperand& destOper = mInstr->getOperand(0);
> > - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> > - int numArgs = mInstr->getNumOperands() - 1;
> > - for (int i=0; i < numArgs; ++i)
> > - argOpers[i] = &mInstr->getOperand(i+1);
> > -
> > - // x86 address has 4 operands: base, index, scale, and displacement
> > - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> > - int valArgIndx = lastAddrIndx + 1;
> > -
> > - unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> > - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > -
> > - // We only support register and immediate values
> > - assert((argOpers[valArgIndx]->isReg() ||
> > - argOpers[valArgIndx]->isImm()) &&
> > - "invalid operand");
> > -
> > - unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> > - if (argOpers[valArgIndx]->isReg())
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
> > - else
> > - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
> > - (*MIB).addOperand(*argOpers[valArgIndx]);
> > + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
> > + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
> > + MF->insert(I, mainMBB);
> > + MF->insert(I, sinkMBB);
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
> > - MIB.addReg(t1);
> > + MachineInstrBuilder MIB;
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
> > - MIB.addReg(t1);
> > - MIB.addReg(t2);
> > + // Transfer the remainder of BB and its successor edges to sinkMBB.
> > + sinkMBB->splice(sinkMBB->begin(), MBB,
> > + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
> > + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
> > +
> > + // thisMBB:
> > + // Lo
> > + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
> > + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> > + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> > + MIB.setMemRefs(MMOBegin, MMOEnd);
> > + // Hi
> > + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
> > + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
> > + if (i == X86::AddrDisp)
> > + MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
> > + else
> > + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> > + }
> > + MIB.setMemRefs(MMOBegin, MMOEnd);
> >
> > - // Generate movc
> > - unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> > - MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
> > - MIB.addReg(t2);
> > - MIB.addReg(t1);
> > + thisMBB->addSuccessor(mainMBB);
> >
> > - // Cmp and exchange if none has modified the memory location
> > - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
> > - for (int i=0; i <= lastAddrIndx; ++i)
> > - (*MIB).addOperand(*argOpers[i]);
> > - MIB.addReg(t3);
> > - assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> > - (*MIB).setMemRefs(mInstr->memoperands_begin(),
> > - mInstr->memoperands_end());
> > + // mainMBB:
> > + MachineBasicBlock *origMainMBB = mainMBB;
> > + mainMBB->addLiveIn(X86::EAX);
> > + mainMBB->addLiveIn(X86::EDX);
> >
> > - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
> > - MIB.addReg(X86::EAX);
> > + // Copy EDX:EAX as they are used more than once.
> > + unsigned LoReg = MRI.createVirtualRegister(RC);
> > + unsigned HiReg = MRI.createVirtualRegister(RC);
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
> >
> > - // insert branch
> > - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> > + unsigned t1L = MRI.createVirtualRegister(RC);
> > + unsigned t1H = MRI.createVirtualRegister(RC);
> >
> > - mInstr->eraseFromParent(); // The pseudo instruction is gone now.
> > - return nextMBB;
> > + unsigned Opc = MI->getOpcode();
> > + switch (Opc) {
> > + default:
> > + llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
> > + case X86::ATOMAND6432:
> > + case X86::ATOMOR6432:
> > + case X86::ATOMXOR6432:
> > + case X86::ATOMADD6432:
> > + case X86::ATOMSUB6432: {
> > + unsigned HiOpc;
> > + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
> > + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
> > + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
> > + break;
> > + }
> > + case X86::ATOMNAND6432: {
> > + unsigned HiOpc, NOTOpc;
> > + unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
> > + unsigned t2L = MRI.createVirtualRegister(RC);
> > + unsigned t2H = MRI.createVirtualRegister(RC);
> > + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
> > + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
> > + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
> > + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
> > + break;
> > + }
> > + case X86::ATOMSWAP6432: {
> > + unsigned HiOpc;
> > + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
> > + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
> > + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
> > + break;
> > + }
> > + }
> > +
> > + // Copy EDX:EAX back from HiReg:LoReg
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
> > + // Copy ECX:EBX from t1H:t1L
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
> > + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
> > +
> > + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
> > + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> > + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> > + MIB.setMemRefs(MMOBegin, MMOEnd);
> > +
> > + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
> > +
> > + mainMBB->addSuccessor(origMainMBB);
> > + mainMBB->addSuccessor(sinkMBB);
> > +
> > + // sinkMBB:
> > + sinkMBB->addLiveIn(X86::EAX);
> > + sinkMBB->addLiveIn(X86::EDX);
> > +
> > + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> > + TII->get(TargetOpcode::COPY), DstLoReg)
> > + .addReg(X86::EAX);
> > + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> > + TII->get(TargetOpcode::COPY), DstHiReg)
> > + .addReg(X86::EDX);
> > +
> > + MI->eraseFromParent();
> > + return sinkMBB;
> > }
> >
> > // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
> > @@ -13176,130 +13289,42 @@
> > return EmitMonitor(MI, BB);
> >
> > // Atomic Lowering.
> > - case X86::ATOMMIN32:
> > - case X86::ATOMMAX32:
> > - case X86::ATOMUMIN32:
> > - case X86::ATOMUMAX32:
> > - case X86::ATOMMIN16:
> > - case X86::ATOMMAX16:
> > - case X86::ATOMUMIN16:
> > - case X86::ATOMUMAX16:
> > - case X86::ATOMMIN64:
> > - case X86::ATOMMAX64:
> > - case X86::ATOMUMIN64:
> > - case X86::ATOMUMAX64: {
> > - unsigned Opc;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
> > - case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
> > - case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
> > - case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
> > - case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
> > - case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
> > - case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
> > - case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
> > - case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
> > - case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
> > - case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
> > - case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
> > - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
> > - }
> > - return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
> > - }
> > -
> > - case X86::ATOMAND32:
> > - case X86::ATOMOR32:
> > - case X86::ATOMXOR32:
> > - case X86::ATOMNAND32: {
> > - bool Invert = false;
> > - unsigned RegOpc, ImmOpc;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMAND32:
> > - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
> > - case X86::ATOMOR32:
> > - RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
> > - case X86::ATOMXOR32:
> > - RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
> > - case X86::ATOMNAND32:
> > - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
> > - }
> > - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> > - X86::MOV32rm, X86::LCMPXCHG32,
> > - X86::NOT32r, X86::EAX,
> > - &X86::GR32RegClass, Invert);
> > - }
> > -
> > + case X86::ATOMAND8:
> > case X86::ATOMAND16:
> > + case X86::ATOMAND32:
> > + case X86::ATOMAND64:
> > + // Fall through
> > + case X86::ATOMOR8:
> > case X86::ATOMOR16:
> > + case X86::ATOMOR32:
> > + case X86::ATOMOR64:
> > + // Fall through
> > case X86::ATOMXOR16:
> > - case X86::ATOMNAND16: {
> > - bool Invert = false;
> > - unsigned RegOpc, ImmOpc;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMAND16:
> > - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
> > - case X86::ATOMOR16:
> > - RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
> > - case X86::ATOMXOR16:
> > - RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
> > - case X86::ATOMNAND16:
> > - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
> > - }
> > - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> > - X86::MOV16rm, X86::LCMPXCHG16,
> > - X86::NOT16r, X86::AX,
> > - &X86::GR16RegClass, Invert);
> > - }
> > -
> > - case X86::ATOMAND8:
> > - case X86::ATOMOR8:
> > case X86::ATOMXOR8:
> > - case X86::ATOMNAND8: {
> > - bool Invert = false;
> > - unsigned RegOpc, ImmOpc;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMAND8:
> > - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
> > - case X86::ATOMOR8:
> > - RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
> > - case X86::ATOMXOR8:
> > - RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
> > - case X86::ATOMNAND8:
> > - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
> > - }
> > - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> > - X86::MOV8rm, X86::LCMPXCHG8,
> > - X86::NOT8r, X86::AL,
> > - &X86::GR8RegClass, Invert);
> > - }
> > -
> > - // This group is for 64-bit host.
> > - case X86::ATOMAND64:
> > - case X86::ATOMOR64:
> > + case X86::ATOMXOR32:
> > case X86::ATOMXOR64:
> > - case X86::ATOMNAND64: {
> > - bool Invert = false;
> > - unsigned RegOpc, ImmOpc;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMAND64:
> > - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
> > - case X86::ATOMOR64:
> > - RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
> > - case X86::ATOMXOR64:
> > - RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
> > - case X86::ATOMNAND64:
> > - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
> > - }
> > - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> > - X86::MOV64rm, X86::LCMPXCHG64,
> > - X86::NOT64r, X86::RAX,
> > - &X86::GR64RegClass, Invert);
> > - }
> > + // Fall through
> > + case X86::ATOMNAND8:
> > + case X86::ATOMNAND16:
> > + case X86::ATOMNAND32:
> > + case X86::ATOMNAND64:
> > + // Fall through
> > + case X86::ATOMMAX16:
> > + case X86::ATOMMAX32:
> > + case X86::ATOMMAX64:
> > + // Fall through
> > + case X86::ATOMMIN16:
> > + case X86::ATOMMIN32:
> > + case X86::ATOMMIN64:
> > + // Fall through
> > + case X86::ATOMUMAX16:
> > + case X86::ATOMUMAX32:
> > + case X86::ATOMUMAX64:
> > + // Fall through
> > + case X86::ATOMUMIN16:
> > + case X86::ATOMUMIN32:
> > + case X86::ATOMUMIN64:
> > + return EmitAtomicLoadArith(MI, BB);
> >
> > // This group does 64-bit operations on a 32-bit host.
> > case X86::ATOMAND6432:
> > @@ -13308,44 +13333,8 @@
> > case X86::ATOMNAND6432:
> > case X86::ATOMADD6432:
> > case X86::ATOMSUB6432:
> > - case X86::ATOMSWAP6432: {
> > - bool Invert = false;
> > - unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
> > - switch (MI->getOpcode()) {
> > - default: llvm_unreachable("illegal opcode!");
> > - case X86::ATOMAND6432:
> > - RegOpcL = RegOpcH = X86::AND32rr;
> > - ImmOpcL = ImmOpcH = X86::AND32ri;
> > - break;
> > - case X86::ATOMOR6432:
> > - RegOpcL = RegOpcH = X86::OR32rr;
> > - ImmOpcL = ImmOpcH = X86::OR32ri;
> > - break;
> > - case X86::ATOMXOR6432:
> > - RegOpcL = RegOpcH = X86::XOR32rr;
> > - ImmOpcL = ImmOpcH = X86::XOR32ri;
> > - break;
> > - case X86::ATOMNAND6432:
> > - RegOpcL = RegOpcH = X86::AND32rr;
> > - ImmOpcL = ImmOpcH = X86::AND32ri;
> > - Invert = true;
> > - break;
> > - case X86::ATOMADD6432:
> > - RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
> > - ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
> > - break;
> > - case X86::ATOMSUB6432:
> > - RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
> > - ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
> > - break;
> > - case X86::ATOMSWAP6432:
> > - RegOpcL = RegOpcH = X86::MOV32rr;
> > - ImmOpcL = ImmOpcH = X86::MOV32ri;
> > - break;
> > - }
> > - return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
> > - ImmOpcL, ImmOpcH, Invert);
> > - }
> > + case X86::ATOMSWAP6432:
> > + return EmitAtomicLoadArith6432(MI, BB);
> >
> > case X86::VASTART_SAVE_XMM_REGS:
> > return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
> >
> > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Sep 19 22:06:15 2012
> > @@ -861,36 +861,17 @@
> > MachineBasicBlock *BB) const;
> > MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
> >
> > - /// Utility function to emit atomic bitwise operations (and, or, xor).
> > - /// It takes the bitwise instruction to expand, the associated machine basic
> > - /// block, and the associated X86 opcodes for reg/reg and reg/imm.
> > - MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
> > - MachineInstr *BInstr,
> > - MachineBasicBlock *BB,
> > - unsigned regOpc,
> > - unsigned immOpc,
> > - unsigned loadOpc,
> > - unsigned cxchgOpc,
> > - unsigned notOpc,
> > - unsigned EAXreg,
> > - const TargetRegisterClass *RC,
> > - bool Invert = false) const;
> > -
> > - MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
> > - MachineInstr *BInstr,
> > - MachineBasicBlock *BB,
> > - unsigned regOpcL,
> > - unsigned regOpcH,
> > - unsigned immOpcL,
> > - unsigned immOpcH,
> > - bool Invert = false) const;
> > -
> > - /// Utility function to emit atomic min and max. It takes the min/max
> > - /// instruction to expand, the associated basic block, and the associated
> > - /// cmov opcode for moving the min or max value.
> > - MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
> > - MachineBasicBlock *BB,
> > - unsigned cmovOpc) const;
> > + /// Utility function to emit atomic-load-arith operations (and, or, xor,
> > + /// nand, max, min, umax, umin). It takes the corresponding instruction to
> > + /// expand, the associated machine basic block, and the associated X86
> > + /// opcodes for reg/reg.
> > + MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
> > + MachineBasicBlock *MBB) const;
> > +
> > + /// Utility function to emit atomic-load-arith operations (and, or, xor,
> > + /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
> > + MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
> > + MachineBasicBlock *MBB) const;
> >
> > // Utility function to emit the low-level va_arg code for X86-64.
> > MachineBasicBlock *EmitVAARG64WithCustomInserter(
> >
> > Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
> > +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Wed Sep 19 22:06:15 2012
> > @@ -483,8 +483,7 @@
> > //===----------------------------------------------------------------------===//
> >
> > // Atomic exchange, and, or, xor
> > -let Constraints = "$val = $dst", Defs = [EFLAGS],
> > - usesCustomInserter = 1 in {
> > +let usesCustomInserter = 1 in {
> >
> > def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
> > "#ATOMAND8 PSEUDO!",
> > @@ -578,11 +577,7 @@
> > [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
> > }
> >
> > -let Constraints = "$val1 = $dst1, $val2 = $dst2",
> > - Defs = [EFLAGS, EAX, EBX, ECX, EDX],
> > - Uses = [EAX, EBX, ECX, EDX],
> > - mayLoad = 1, mayStore = 1,
> > - usesCustomInserter = 1 in {
> > +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in {
> > def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
> > (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
> > "#ATOMAND6432 PSEUDO!", []>;
> >
> > Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> > +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Sep 19 22:06:15 2012
> > @@ -2266,7 +2266,7 @@
> > }
> >
> > /// getCondFromCmovOpc - return condition code of a CMov opcode.
> > -static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
> > +X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
> > switch (Opc) {
> > default: return X86::COND_INVALID;
> > case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
> > @@ -3314,7 +3314,7 @@
> > if (OldCC != X86::COND_INVALID)
> > OpcIsSET = true;
> > else
> > - OldCC = getCondFromCMovOpc(Instr.getOpcode());
> > + OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
> > }
> > if (OldCC == X86::COND_INVALID) return false;
> > }
> >
> > Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> > +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Sep 19 22:06:15 2012
> > @@ -61,6 +61,9 @@
> > // Turn condition code into conditional branch opcode.
> > unsigned GetCondBranchFromCond(CondCode CC);
> >
> > + // Turn CMov opcode into condition code.
> > + CondCode getCondFromCMovOpc(unsigned Opc);
> > +
> > /// GetOppositeBranchCondition - Return the inverse of the specified cond,
> > /// e.g. turning COND_E to COND_NE.
> > CondCode GetOppositeBranchCondition(X86::CondCode CC);
> >
> > Modified: llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll (original)
> > +++ llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll Wed Sep 19 22:06:15 2012
> > @@ -7,17 +7,16 @@
> > define void @t(i64* nocapture %p) nounwind ssp {
> > entry:
> > ; CHECK: t:
> > -; CHECK: movl $1
> > -; CHECK: movl (%ebp), %eax
> > -; CHECK: movl 4(%ebp), %edx
> > +; CHECK: movl ([[REG:%[a-z]+]]), %eax
> > +; CHECK: movl 4([[REG]]), %edx
> > ; CHECK: LBB0_1:
> > -; CHECK-NOT: movl $1
> > -; CHECK-NOT: movl $0
> > +; CHECK: movl $1
> > ; CHECK: addl
> > +; CHECK: movl $0
> > ; CHECK: adcl
> > ; CHECK: lock
> > -; CHECK: cmpxchg8b
> > -; CHECK: jne
> > +; CHECK-NEXT: cmpxchg8b ([[REG]])
> > +; CHECK-NEXT: jne
> > %0 = atomicrmw add i64* %p, i64 1 seq_cst
> > ret void
> > }
> >
> > Added: llvm/trunk/test/CodeGen/X86/atomic16.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic16.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic16.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/atomic16.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,250 @@
> > +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> > +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> > +
> > + at sc16 = external global i16
> > +
> > +define void @atomic_fetch_add16() nounwind {
> > +; X64: atomic_fetch_add16
> > +; X32: atomic_fetch_add16
> > +entry:
> > +; 32-bit
> > + %t1 = atomicrmw add i16* @sc16, i16 1 acquire
> > +; X64: lock
> > +; X64: incw
> > +; X32: lock
> > +; X32: incw
> > + %t2 = atomicrmw add i16* @sc16, i16 3 acquire
> > +; X64: lock
> > +; X64: addw $3
> > +; X32: lock
> > +; X32: addw $3
> > + %t3 = atomicrmw add i16* @sc16, i16 5 acquire
> > +; X64: lock
> > +; X64: xaddw
> > +; X32: lock
> > +; X32: xaddw
> > + %t4 = atomicrmw add i16* @sc16, i16 %t3 acquire
> > +; X64: lock
> > +; X64: addw
> > +; X32: lock
> > +; X32: addw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_sub16() nounwind {
> > +; X64: atomic_fetch_sub16
> > +; X32: atomic_fetch_sub16
> > + %t1 = atomicrmw sub i16* @sc16, i16 1 acquire
> > +; X64: lock
> > +; X64: decw
> > +; X32: lock
> > +; X32: decw
> > + %t2 = atomicrmw sub i16* @sc16, i16 3 acquire
> > +; X64: lock
> > +; X64: subw $3
> > +; X32: lock
> > +; X32: subw $3
> > + %t3 = atomicrmw sub i16* @sc16, i16 5 acquire
> > +; X64: lock
> > +; X64: xaddw
> > +; X32: lock
> > +; X32: xaddw
> > + %t4 = atomicrmw sub i16* @sc16, i16 %t3 acquire
> > +; X64: lock
> > +; X64: subw
> > +; X32: lock
> > +; X32: subw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_and16() nounwind {
> > +; X64: atomic_fetch_and16
> > +; X32: atomic_fetch_and16
> > + %t1 = atomicrmw and i16* @sc16, i16 3 acquire
> > +; X64: lock
> > +; X64: andw $3
> > +; X32: lock
> > +; X32: andw $3
> > + %t2 = atomicrmw and i16* @sc16, i16 5 acquire
> > +; X64: andw
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: andw
> > +; X32: lock
> > +; X32: cmpxchgw
> > + %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
> > +; X64: lock
> > +; X64: andw
> > +; X32: lock
> > +; X32: andw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_or16() nounwind {
> > +; X64: atomic_fetch_or16
> > +; X32: atomic_fetch_or16
> > + %t1 = atomicrmw or i16* @sc16, i16 3 acquire
> > +; X64: lock
> > +; X64: orw $3
> > +; X32: lock
> > +; X32: orw $3
> > + %t2 = atomicrmw or i16* @sc16, i16 5 acquire
> > +; X64: orw
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: orw
> > +; X32: lock
> > +; X32: cmpxchgw
> > + %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
> > +; X64: lock
> > +; X64: orw
> > +; X32: lock
> > +; X32: orw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_xor16() nounwind {
> > +; X64: atomic_fetch_xor16
> > +; X32: atomic_fetch_xor16
> > + %t1 = atomicrmw xor i16* @sc16, i16 3 acquire
> > +; X64: lock
> > +; X64: xorw $3
> > +; X32: lock
> > +; X32: xorw $3
> > + %t2 = atomicrmw xor i16* @sc16, i16 5 acquire
> > +; X64: xorw
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: xorw
> > +; X32: lock
> > +; X32: cmpxchgw
> > + %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
> > +; X64: lock
> > +; X64: xorw
> > +; X32: lock
> > +; X32: xorw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_nand16(i16 %x) nounwind {
> > +; X64: atomic_fetch_nand16
> > +; X32: atomic_fetch_nand16
> > + %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
> > +; X64: andw
> > +; X64: notw
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: andw
> > +; X32: notw
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_max16(i16 %x) nounwind {
> > + %t1 = atomicrmw max i16* @sc16, i16 %x acquire
> > +; X64: cmpw
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgw
> > +
> > +; X32: cmpw
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_min16(i16 %x) nounwind {
> > + %t1 = atomicrmw min i16* @sc16, i16 %x acquire
> > +; X64: cmpw
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgw
> > +
> > +; X32: cmpw
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umax16(i16 %x) nounwind {
> > + %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
> > +; X64: cmpw
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgw
> > +
> > +; X32: cmpw
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umin16(i16 %x) nounwind {
> > + %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
> > +; X64: cmpw
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: cmpw
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_cmpxchg16() nounwind {
> > + %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire
> > +; X64: lock
> > +; X64: cmpxchgw
> > +; X32: lock
> > +; X32: cmpxchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_store16(i16 %x) nounwind {
> > + store atomic i16 %x, i16* @sc16 release, align 4
> > +; X64-NOT: lock
> > +; X64: movw
> > +; X32-NOT: lock
> > +; X32: movw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_swap16(i16 %x) nounwind {
> > + %t1 = atomicrmw xchg i16* @sc16, i16 %x acquire
> > +; X64-NOT: lock
> > +; X64: xchgw
> > +; X32-NOT: lock
> > +; X32: xchgw
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/X86/atomic32.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic32.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic32.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/atomic32.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,250 @@
> > +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> > +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> > +
> > + at sc32 = external global i32
> > +
> > +define void @atomic_fetch_add32() nounwind {
> > +; X64: atomic_fetch_add32
> > +; X32: atomic_fetch_add32
> > +entry:
> > +; 32-bit
> > + %t1 = atomicrmw add i32* @sc32, i32 1 acquire
> > +; X64: lock
> > +; X64: incl
> > +; X32: lock
> > +; X32: incl
> > + %t2 = atomicrmw add i32* @sc32, i32 3 acquire
> > +; X64: lock
> > +; X64: addl $3
> > +; X32: lock
> > +; X32: addl $3
> > + %t3 = atomicrmw add i32* @sc32, i32 5 acquire
> > +; X64: lock
> > +; X64: xaddl
> > +; X32: lock
> > +; X32: xaddl
> > + %t4 = atomicrmw add i32* @sc32, i32 %t3 acquire
> > +; X64: lock
> > +; X64: addl
> > +; X32: lock
> > +; X32: addl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_sub32() nounwind {
> > +; X64: atomic_fetch_sub32
> > +; X32: atomic_fetch_sub32
> > + %t1 = atomicrmw sub i32* @sc32, i32 1 acquire
> > +; X64: lock
> > +; X64: decl
> > +; X32: lock
> > +; X32: decl
> > + %t2 = atomicrmw sub i32* @sc32, i32 3 acquire
> > +; X64: lock
> > +; X64: subl $3
> > +; X32: lock
> > +; X32: subl $3
> > + %t3 = atomicrmw sub i32* @sc32, i32 5 acquire
> > +; X64: lock
> > +; X64: xaddl
> > +; X32: lock
> > +; X32: xaddl
> > + %t4 = atomicrmw sub i32* @sc32, i32 %t3 acquire
> > +; X64: lock
> > +; X64: subl
> > +; X32: lock
> > +; X32: subl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_and32() nounwind {
> > +; X64: atomic_fetch_and32
> > +; X32: atomic_fetch_and32
> > + %t1 = atomicrmw and i32* @sc32, i32 3 acquire
> > +; X64: lock
> > +; X64: andl $3
> > +; X32: lock
> > +; X32: andl $3
> > + %t2 = atomicrmw and i32* @sc32, i32 5 acquire
> > +; X64: andl
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: andl
> > +; X32: lock
> > +; X32: cmpxchgl
> > + %t3 = atomicrmw and i32* @sc32, i32 %t2 acquire
> > +; X64: lock
> > +; X64: andl
> > +; X32: lock
> > +; X32: andl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_or32() nounwind {
> > +; X64: atomic_fetch_or32
> > +; X32: atomic_fetch_or32
> > + %t1 = atomicrmw or i32* @sc32, i32 3 acquire
> > +; X64: lock
> > +; X64: orl $3
> > +; X32: lock
> > +; X32: orl $3
> > + %t2 = atomicrmw or i32* @sc32, i32 5 acquire
> > +; X64: orl
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: orl
> > +; X32: lock
> > +; X32: cmpxchgl
> > + %t3 = atomicrmw or i32* @sc32, i32 %t2 acquire
> > +; X64: lock
> > +; X64: orl
> > +; X32: lock
> > +; X32: orl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_xor32() nounwind {
> > +; X64: atomic_fetch_xor32
> > +; X32: atomic_fetch_xor32
> > + %t1 = atomicrmw xor i32* @sc32, i32 3 acquire
> > +; X64: lock
> > +; X64: xorl $3
> > +; X32: lock
> > +; X32: xorl $3
> > + %t2 = atomicrmw xor i32* @sc32, i32 5 acquire
> > +; X64: xorl
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: xorl
> > +; X32: lock
> > +; X32: cmpxchgl
> > + %t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire
> > +; X64: lock
> > +; X64: xorl
> > +; X32: lock
> > +; X32: xorl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_nand32(i32 %x) nounwind {
> > +; X64: atomic_fetch_nand32
> > +; X32: atomic_fetch_nand32
> > + %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
> > +; X64: andl
> > +; X64: notl
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: andl
> > +; X32: notl
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_max32(i32 %x) nounwind {
> > + %t1 = atomicrmw max i32* @sc32, i32 %x acquire
> > +; X64: cmpl
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgl
> > +
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_min32(i32 %x) nounwind {
> > + %t1 = atomicrmw min i32* @sc32, i32 %x acquire
> > +; X64: cmpl
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgl
> > +
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umax32(i32 %x) nounwind {
> > + %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
> > +; X64: cmpl
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgl
> > +
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umin32(i32 %x) nounwind {
> > + %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
> > +; X64: cmpl
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_cmpxchg32() nounwind {
> > + %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire
> > +; X64: lock
> > +; X64: cmpxchgl
> > +; X32: lock
> > +; X32: cmpxchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_store32(i32 %x) nounwind {
> > + store atomic i32 %x, i32* @sc32 release, align 4
> > +; X64-NOT: lock
> > +; X64: movl
> > +; X32-NOT: lock
> > +; X32: movl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_swap32(i32 %x) nounwind {
> > + %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
> > +; X64-NOT: lock
> > +; X64: xchgl
> > +; X32-NOT: lock
> > +; X32: xchgl
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/X86/atomic64.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic64.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic64.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/atomic64.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,216 @@
> > +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> > +
> > + at sc64 = external global i64
> > +
> > +define void @atomic_fetch_add64() nounwind {
> > +; X64: atomic_fetch_add64
> > +entry:
> > + %t1 = atomicrmw add i64* @sc64, i64 1 acquire
> > +; X64: lock
> > +; X64: incq
> > + %t2 = atomicrmw add i64* @sc64, i64 3 acquire
> > +; X64: lock
> > +; X64: addq $3
> > + %t3 = atomicrmw add i64* @sc64, i64 5 acquire
> > +; X64: lock
> > +; X64: xaddq
> > + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
> > +; X64: lock
> > +; X64: addq
> > + ret void
> > +; X64: ret
> > +}
> > +
> > +define void @atomic_fetch_sub64() nounwind {
> > +; X64: atomic_fetch_sub64
> > + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
> > +; X64: lock
> > +; X64: decq
> > + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
> > +; X64: lock
> > +; X64: subq $3
> > + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
> > +; X64: lock
> > +; X64: xaddq
> > + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
> > +; X64: lock
> > +; X64: subq
> > + ret void
> > +; X64: ret
> > +}
> > +
> > +define void @atomic_fetch_and64() nounwind {
> > +; X64: atomic_fetch_and64
> > + %t1 = atomicrmw and i64* @sc64, i64 3 acquire
> > +; X64: lock
> > +; X64: andq $3
> > + %t2 = atomicrmw and i64* @sc64, i64 5 acquire
> > +; X64: andq
> > +; X64: lock
> > +; X64: cmpxchgq
> > + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
> > +; X64: lock
> > +; X64: andq
> > + ret void
> > +; X64: ret
> > +}
> > +
> > +define void @atomic_fetch_or64() nounwind {
> > +; X64: atomic_fetch_or64
> > + %t1 = atomicrmw or i64* @sc64, i64 3 acquire
> > +; X64: lock
> > +; X64: orq $3
> > + %t2 = atomicrmw or i64* @sc64, i64 5 acquire
> > +; X64: orq
> > +; X64: lock
> > +; X64: cmpxchgq
> > + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
> > +; X64: lock
> > +; X64: orq
> > + ret void
> > +; X64: ret
> > +}
> > +
> > +define void @atomic_fetch_xor64() nounwind {
> > +; X64: atomic_fetch_xor64
> > + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
> > +; X64: lock
> > +; X64: xorq $3
> > + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
> > +; X64: xorq
> > +; X64: lock
> > +; X64: cmpxchgq
> > + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
> > +; X64: lock
> > +; X64: xorq
> > + ret void
> > +; X64: ret
> > +}
> > +
> > +define void @atomic_fetch_nand64(i64 %x) nounwind {
> > +; X64: atomic_fetch_nand64
> > +; X32: atomic_fetch_nand64
> > + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
> > +; X64: andq
> > +; X64: notq
> > +; X64: lock
> > +; X64: cmpxchgq
> > +; X32: andl
> > +; X32: andl
> > +; X32: notl
> > +; X32: notl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_max64(i64 %x) nounwind {
> > + %t1 = atomicrmw max i64* @sc64, i64 %x acquire
> > +; X64: cmpq
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgq
> > +
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_min64(i64 %x) nounwind {
> > + %t1 = atomicrmw min i64* @sc64, i64 %x acquire
> > +; X64: cmpq
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgq
> > +
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umax64(i64 %x) nounwind {
> > + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
> > +; X64: cmpq
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgq
> > +
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umin64(i64 %x) nounwind {
> > + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
> > +; X64: cmpq
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgq
> > +
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_cmpxchg64() nounwind {
> > + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
> > +; X64: lock
> > +; X64: cmpxchgq
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_store64(i64 %x) nounwind {
> > + store atomic i64 %x, i64* @sc64 release, align 8
> > +; X64-NOT: lock
> > +; X64: movq
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_swap64(i64 %x) nounwind {
> > + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
> > +; X64-NOT: lock
> > +; X64: xchgq
> > +; X32: lock
> > +; X32: xchg8b
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/X86/atomic6432.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic6432.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic6432.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/atomic6432.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,209 @@
> > +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> > +; XFAIL: *
> > +
> > + at sc64 = external global i64
> > +
> > +define void @atomic_fetch_add64() nounwind {
> > +; X32: atomic_fetch_add64
> > +entry:
> > + %t1 = atomicrmw add i64* @sc64, i64 1 acquire
> > +; X32: addl
> > +; X32: adcl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t2 = atomicrmw add i64* @sc64, i64 3 acquire
> > +; X32: addl
> > +; X32: adcl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t3 = atomicrmw add i64* @sc64, i64 5 acquire
> > +; X32: addl
> > +; X32: adcl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
> > +; X32: addl
> > +; X32: adcl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_sub64() nounwind {
> > +; X32: atomic_fetch_sub64
> > + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
> > +; X32: subl
> > +; X32: sbbl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
> > +; X32: subl
> > +; X32: sbbl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
> > +; X32: subl
> > +; X32: sbbl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
> > +; X32: subl
> > +; X32: sbbl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_and64() nounwind {
> > +; X32: atomic_fetch_and64
> > + %t1 = atomicrmw and i64* @sc64, i64 3 acquire
> > +; X32: andl
> > +; X32: andl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t2 = atomicrmw and i64* @sc64, i64 5 acquire
> > +; X32: andl
> > +; X32: andl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
> > +; X32: andl
> > +; X32: andl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_or64() nounwind {
> > +; X32: atomic_fetch_or64
> > + %t1 = atomicrmw or i64* @sc64, i64 3 acquire
> > +; X32: orl
> > +; X32: orl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t2 = atomicrmw or i64* @sc64, i64 5 acquire
> > +; X32: orl
> > +; X32: orl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
> > +; X32: orl
> > +; X32: orl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_xor64() nounwind {
> > +; X32: atomic_fetch_xor64
> > + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
> > +; X32: xorl
> > +; X32: xorl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
> > +; X32: xorl
> > +; X32: xorl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
> > +; X32: xorl
> > +; X32: xorl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_nand64(i64 %x) nounwind {
> > +; X32: atomic_fetch_nand64
> > + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
> > +; X32: andl
> > +; X32: andl
> > +; X32: notl
> > +; X32: notl
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_max64(i64 %x) nounwind {
> > + %t1 = atomicrmw max i64* @sc64, i64 %x acquire
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_min64(i64 %x) nounwind {
> > + %t1 = atomicrmw min i64* @sc64, i64 %x acquire
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umax64(i64 %x) nounwind {
> > + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umin64(i64 %x) nounwind {
> > + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
> > +; X32: cmpl
> > +; X32: cmpl
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_cmpxchg64() nounwind {
> > + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_store64(i64 %x) nounwind {
> > + store atomic i64 %x, i64* @sc64 release, align 8
> > +; X32: lock
> > +; X32: cmpxchg8b
> > + ret void
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_swap64(i64 %x) nounwind {
> > + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
> > +; X32: lock
> > +; X32: xchg8b
> > + ret void
> > +; X32: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/X86/atomic8.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic8.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic8.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/atomic8.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,251 @@
> > +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> > +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> > +; XFAIL: *
> > +
> > + at sc8 = external global i8
> > +
> > +define void @atomic_fetch_add8() nounwind {
> > +; X64: atomic_fetch_add8
> > +; X32: atomic_fetch_add8
> > +entry:
> > +; 32-bit
> > + %t1 = atomicrmw add i8* @sc8, i8 1 acquire
> > +; X64: lock
> > +; X64: incb
> > +; X32: lock
> > +; X32: incb
> > + %t2 = atomicrmw add i8* @sc8, i8 3 acquire
> > +; X64: lock
> > +; X64: addb $3
> > +; X32: lock
> > +; X32: addb $3
> > + %t3 = atomicrmw add i8* @sc8, i8 5 acquire
> > +; X64: lock
> > +; X64: xaddb
> > +; X32: lock
> > +; X32: xaddb
> > + %t4 = atomicrmw add i8* @sc8, i8 %t3 acquire
> > +; X64: lock
> > +; X64: addb
> > +; X32: lock
> > +; X32: addb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_sub8() nounwind {
> > +; X64: atomic_fetch_sub8
> > +; X32: atomic_fetch_sub8
> > + %t1 = atomicrmw sub i8* @sc8, i8 1 acquire
> > +; X64: lock
> > +; X64: decb
> > +; X32: lock
> > +; X32: decb
> > + %t2 = atomicrmw sub i8* @sc8, i8 3 acquire
> > +; X64: lock
> > +; X64: subb $3
> > +; X32: lock
> > +; X32: subb $3
> > + %t3 = atomicrmw sub i8* @sc8, i8 5 acquire
> > +; X64: lock
> > +; X64: xaddb
> > +; X32: lock
> > +; X32: xaddb
> > + %t4 = atomicrmw sub i8* @sc8, i8 %t3 acquire
> > +; X64: lock
> > +; X64: subb
> > +; X32: lock
> > +; X32: subb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_and8() nounwind {
> > +; X64: atomic_fetch_and8
> > +; X32: atomic_fetch_and8
> > + %t1 = atomicrmw and i8* @sc8, i8 3 acquire
> > +; X64: lock
> > +; X64: andb $3
> > +; X32: lock
> > +; X32: andb $3
> > + %t2 = atomicrmw and i8* @sc8, i8 5 acquire
> > +; X64: andb
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: andb
> > +; X32: lock
> > +; X32: cmpxchgb
> > + %t3 = atomicrmw and i8* @sc8, i8 %t2 acquire
> > +; X64: lock
> > +; X64: andb
> > +; X32: lock
> > +; X32: andb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_or8() nounwind {
> > +; X64: atomic_fetch_or8
> > +; X32: atomic_fetch_or8
> > + %t1 = atomicrmw or i8* @sc8, i8 3 acquire
> > +; X64: lock
> > +; X64: orb $3
> > +; X32: lock
> > +; X32: orb $3
> > + %t2 = atomicrmw or i8* @sc8, i8 5 acquire
> > +; X64: orb
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: orb
> > +; X32: lock
> > +; X32: cmpxchgb
> > + %t3 = atomicrmw or i8* @sc8, i8 %t2 acquire
> > +; X64: lock
> > +; X64: orb
> > +; X32: lock
> > +; X32: orb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_xor8() nounwind {
> > +; X64: atomic_fetch_xor8
> > +; X32: atomic_fetch_xor8
> > + %t1 = atomicrmw xor i8* @sc8, i8 3 acquire
> > +; X64: lock
> > +; X64: xorb $3
> > +; X32: lock
> > +; X32: xorb $3
> > + %t2 = atomicrmw xor i8* @sc8, i8 5 acquire
> > +; X64: xorb
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: xorb
> > +; X32: lock
> > +; X32: cmpxchgb
> > + %t3 = atomicrmw xor i8* @sc8, i8 %t2 acquire
> > +; X64: lock
> > +; X64: xorb
> > +; X32: lock
> > +; X32: xorb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_nand8(i8 %x) nounwind {
> > +; X64: atomic_fetch_nand8
> > +; X32: atomic_fetch_nand8
> > + %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
> > +; X64: andb
> > +; X64: notb
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: andb
> > +; X32: notb
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_max8(i8 %x) nounwind {
> > + %t1 = atomicrmw max i8* @sc8, i8 %x acquire
> > +; X64: cmpb
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgb
> > +
> > +; X32: cmpb
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_min8(i8 %x) nounwind {
> > + %t1 = atomicrmw min i8* @sc8, i8 %x acquire
> > +; X64: cmpb
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgb
> > +
> > +; X32: cmpb
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umax8(i8 %x) nounwind {
> > + %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
> > +; X64: cmpb
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgb
> > +
> > +; X32: cmpb
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_umin8(i8 %x) nounwind {
> > + %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
> > +; X64: cmpb
> > +; X64: cmov
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: cmpb
> > +; X32: cmov
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_cmpxchg8() nounwind {
> > + %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire
> > +; X64: lock
> > +; X64: cmpxchgb
> > +; X32: lock
> > +; X32: cmpxchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_store8(i8 %x) nounwind {
> > + store atomic i8 %x, i8* @sc8 release, align 4
> > +; X64-NOT: lock
> > +; X64: movb
> > +; X32-NOT: lock
> > +; X32: movb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> > +
> > +define void @atomic_fetch_swap8(i8 %x) nounwind {
> > + %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
> > +; X64-NOT: lock
> > +; X64: xchgb
> > +; X32-NOT: lock
> > +; X32: xchgb
> > + ret void
> > +; X64: ret
> > +; X32: ret
> > +}
> >
> > Modified: llvm/trunk/test/CodeGen/X86/atomic_op.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic_op.ll?rev=164281&r1=164280&r2=164281&view=diff
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/atomic_op.ll (original)
> > +++ llvm/trunk/test/CodeGen/X86/atomic_op.ll Wed Sep 19 22:06:15 2012
> > @@ -1,4 +1,4 @@
> > -; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
> > +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
> >
> > target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
> >
> > @@ -107,13 +107,12 @@
> > ; CHECK: cmpxchgl
> > %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
> > store i32 %17, i32* %old
> > + ; CHECK: movl [[R17atomic:.*]], %eax
> > ; CHECK: movl $1401, %[[R17mask:[a-z]*]]
> > - ; CHECK: movl [[R17atomic:.*]], %eax
> > - ; CHECK: movl %eax, %[[R17newval:[a-z]*]]
> > - ; CHECK: andl %[[R17mask]], %[[R17newval]]
> > - ; CHECK: notl %[[R17newval]]
> > + ; CHECK: andl %eax, %[[R17mask]]
> > + ; CHECK: notl %[[R17mask]]
> > ; CHECK: lock
> > - ; CHECK: cmpxchgl %[[R17newval]], [[R17atomic]]
> > + ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
> > ; CHECK: jne
> > ; CHECK: movl %eax,
> > %18 = atomicrmw nand i32* %val2, i32 1401 monotonic
> >
> > Added: llvm/trunk/test/CodeGen/X86/pr13458.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr13458.ll?rev=164281&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/pr13458.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/pr13458.ll Wed Sep 19 22:06:15 2012
> > @@ -0,0 +1,14 @@
> > +; RUN: llc < %s
> > +
> > +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> > +target triple = "x86_64-apple-darwin11.4.2"
> > +
> > +%v8_uniform_Stats.0.2.4.10 = type { i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, [7 x i32], [7 x i64] }
> > +
> > + at globalStats = external global %v8_uniform_Stats.0.2.4.10
> > +
> > +define void @MergeStats() nounwind {
> > +allocas:
> > + %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
> > + ret void
> > +}
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list