[llvm-commits] [llvm] r164281 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrCompiler.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/2010-01-08-Atomic64Bug.ll test/CodeGen/X86/atomic16.ll test/CodeGen/X86/atomic32.ll test/CodeGen/X86/atomic64.ll test/CodeGen/X86/atomic6432.ll test/CodeGen/X86/atomic8.ll test/CodeGen/X86/atomic_op.ll test/CodeGen/X86/pr13458.ll
Cameron Zwarich
zwarich at apple.com
Sun Feb 24 15:24:03 PST 2013
I guess it is a bit late to say this now, but this commit has a lot of problems. It sticks store memory operands onto loads, copies kill flags from one use to multiple uses, and uses a physical register across basic blocks prior to register allocation. I have a patch ready for the first two, and I'll probably fix the last one and commit it.
Cameron
On Sep 19, 2012, at 8:06 PM, Michael Liao <michael.liao at intel.com> wrote:
> Author: hliao
> Date: Wed Sep 19 22:06:15 2012
> New Revision: 164281
>
> URL: http://llvm.org/viewvc/llvm-project?rev=164281&view=rev
> Log:
> Re-work X86 code generation of atomic ops with spin-loop
>
> - Rewrite/merge pseudo-atomic instruction emitters to address the
> following issue:
> * Reduce one unnecessary load in spin-loop
>
> previously the spin-loop looks like
>
> thisMBB:
> newMBB:
> ld t1 = [bitinstr.addr]
> op t2 = t1, [bitinstr.val]
> not t3 = t2 (if Invert)
> mov EAX = t1
> lcs dest = [bitinstr.addr], t3 [EAX is implicit]
> bz newMBB
> fallthrough -->nextMBB
>
> the 'ld' at the beginning of newMBB should be lift out of the loop
> as lcs (or CMPXCHG on x86) will load the current memory value into
> EAX. This loop is refined as:
>
> thisMBB:
> EAX = LOAD [MI.addr]
> mainMBB:
> t1 = OP [MI.val], EAX
> LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> JNE mainMBB
> sinkMBB:
>
> * Remove immopc as, so far, all pseudo-atomic instructions has
> all-register form only, there is no immedidate operand.
>
> * Remove unnecessary attributes/modifiers in pseudo-atomic instruction
> td
>
> * Fix issues in PR13458
>
> - Add comprehensive tests on atomic ops on various data types.
> NOTE: Some of them are turned off due to missing functionality.
>
> - Revise tests due to the new spin-loop generated.
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/atomic16.ll
> llvm/trunk/test/CodeGen/X86/atomic32.ll
> llvm/trunk/test/CodeGen/X86/atomic64.ll
> llvm/trunk/test/CodeGen/X86/atomic6432.ll
> llvm/trunk/test/CodeGen/X86/atomic8.ll
> llvm/trunk/test/CodeGen/X86/pr13458.ll
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrCompiler.td
> llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> llvm/trunk/lib/Target/X86/X86InstrInfo.h
> llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
> llvm/trunk/test/CodeGen/X86/atomic_op.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Sep 19 22:06:15 2012
> @@ -11911,385 +11911,498 @@
> //===----------------------------------------------------------------------===//
>
> // private utility function
> +
> +// Get CMPXCHG opcode for the specified data type.
> +static unsigned getCmpXChgOpcode(EVT VT) {
> + switch (VT.getSimpleVT().SimpleTy) {
> + case MVT::i8: return X86::LCMPXCHG8;
> + case MVT::i16: return X86::LCMPXCHG16;
> + case MVT::i32: return X86::LCMPXCHG32;
> + case MVT::i64: return X86::LCMPXCHG64;
> + default:
> + break;
> + }
> + llvm_unreachable("Invalid operand size!");
> +}
> +
> +// Get LOAD opcode for the specified data type.
> +static unsigned getLoadOpcode(EVT VT) {
> + switch (VT.getSimpleVT().SimpleTy) {
> + case MVT::i8: return X86::MOV8rm;
> + case MVT::i16: return X86::MOV16rm;
> + case MVT::i32: return X86::MOV32rm;
> + case MVT::i64: return X86::MOV64rm;
> + default:
> + break;
> + }
> + llvm_unreachable("Invalid operand size!");
> +}
> +
> +// Get opcode of the non-atomic one from the specified atomic instruction.
> +static unsigned getNonAtomicOpcode(unsigned Opc) {
> + switch (Opc) {
> + case X86::ATOMAND8: return X86::AND8rr;
> + case X86::ATOMAND16: return X86::AND16rr;
> + case X86::ATOMAND32: return X86::AND32rr;
> + case X86::ATOMAND64: return X86::AND64rr;
> + case X86::ATOMOR8: return X86::OR8rr;
> + case X86::ATOMOR16: return X86::OR16rr;
> + case X86::ATOMOR32: return X86::OR32rr;
> + case X86::ATOMOR64: return X86::OR64rr;
> + case X86::ATOMXOR8: return X86::XOR8rr;
> + case X86::ATOMXOR16: return X86::XOR16rr;
> + case X86::ATOMXOR32: return X86::XOR32rr;
> + case X86::ATOMXOR64: return X86::XOR64rr;
> + }
> + llvm_unreachable("Unhandled atomic-load-op opcode!");
> +}
> +
> +// Get opcode of the non-atomic one from the specified atomic instruction with
> +// extra opcode.
> +static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
> + unsigned &ExtraOpc) {
> + switch (Opc) {
> + case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
> + case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
> + case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
> + case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
> + case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
> + case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
> + case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
> + case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
> + case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
> + case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
> + case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
> + case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
> + case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
> + case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
> + case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
> + case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
> + }
> + llvm_unreachable("Unhandled atomic-load-op opcode!");
> +}
> +
> +// Get opcode of the non-atomic one from the specified atomic instruction for
> +// 64-bit data type on 32-bit target.
> +static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
> + switch (Opc) {
> + case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
> + case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
> + case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
> + case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
> + case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
> + case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
> + }
> + llvm_unreachable("Unhandled atomic-load-op opcode!");
> +}
> +
> +// Get opcode of the non-atomic one from the specified atomic instruction for
> +// 64-bit data type on 32-bit target with extra opcode.
> +static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
> + unsigned &HiOpc,
> + unsigned &ExtraOpc) {
> + switch (Opc) {
> + case X86::ATOMNAND6432:
> + ExtraOpc = X86::NOT32r;
> + HiOpc = X86::AND32rr;
> + return X86::AND32rr;
> + }
> + llvm_unreachable("Unhandled atomic-load-op opcode!");
> +}
> +
> +// Get pseudo CMOV opcode from the specified data type.
> +static unsigned getPseudoCMOVOpc(EVT VT) {
> + switch (VT.getSimpleVT().SimpleTy) {
> + case MVT::i16: return X86::CMOV_GR16;
> + case MVT::i32: return X86::CMOV_GR32;
> + default:
> + break;
> + }
> + llvm_unreachable("Unknown CMOV opcode!");
> +}
> +
> +// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
> +// They will be translated into a spin-loop or compare-exchange loop from
> +//
> +// ...
> +// dst = atomic-fetch-op MI.addr, MI.val
> +// ...
> +//
> +// to
> +//
> +// ...
> +// EAX = LOAD MI.addr
> +// loop:
> +// t1 = OP MI.val, EAX
> +// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> +// JNE loop
> +// sink:
> +// dst = EAX
> +// ...
> MachineBasicBlock *
> -X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
> - MachineBasicBlock *MBB,
> - unsigned regOpc,
> - unsigned immOpc,
> - unsigned LoadOpc,
> - unsigned CXchgOpc,
> - unsigned notOpc,
> - unsigned EAXreg,
> - const TargetRegisterClass *RC,
> - bool Invert) const {
> - // For the atomic bitwise operator, we generate
> - // thisMBB:
> - // newMBB:
> - // ld t1 = [bitinstr.addr]
> - // op t2 = t1, [bitinstr.val]
> - // not t3 = t2 (if Invert)
> - // mov EAX = t1
> - // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
> - // bz newMBB
> - // fallthrough -->nextMBB
> +X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
> + MachineBasicBlock *MBB) const {
> const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> - MachineFunction::iterator MBBIter = MBB;
> - ++MBBIter;
> + DebugLoc DL = MI->getDebugLoc();
>
> - /// First build the CFG
> - MachineFunction *F = MBB->getParent();
> - MachineBasicBlock *thisMBB = MBB;
> - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - F->insert(MBBIter, newMBB);
> - F->insert(MBBIter, nextMBB);
> -
> - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> - nextMBB->splice(nextMBB->begin(), thisMBB,
> - llvm::next(MachineBasicBlock::iterator(bInstr)),
> - thisMBB->end());
> - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> -
> - // Update thisMBB to fall through to newMBB
> - thisMBB->addSuccessor(newMBB);
> -
> - // newMBB jumps to itself and fall through to nextMBB
> - newMBB->addSuccessor(nextMBB);
> - newMBB->addSuccessor(newMBB);
> -
> - // Insert instructions into newMBB based on incoming instruction
> - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
> - "unexpected number of operands");
> - DebugLoc dl = bInstr->getDebugLoc();
> - MachineOperand& destOper = bInstr->getOperand(0);
> - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> - int numArgs = bInstr->getNumOperands() - 1;
> - for (int i=0; i < numArgs; ++i)
> - argOpers[i] = &bInstr->getOperand(i+1);
> -
> - // x86 address has 4 operands: base, index, scale, and displacement
> - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> - int valArgIndx = lastAddrIndx + 1;
> -
> - unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
> - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> -
> - unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
> - assert((argOpers[valArgIndx]->isReg() ||
> - argOpers[valArgIndx]->isImm()) &&
> - "invalid operand");
> - if (argOpers[valArgIndx]->isReg())
> - MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
> - else
> - MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
> - MIB.addReg(t1);
> - (*MIB).addOperand(*argOpers[valArgIndx]);
> + MachineFunction *MF = MBB->getParent();
> + MachineRegisterInfo &MRI = MF->getRegInfo();
>
> - unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
> - if (Invert) {
> - MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
> - }
> - else
> - t3 = t2;
> + const BasicBlock *BB = MBB->getBasicBlock();
> + MachineFunction::iterator I = MBB;
> + ++I;
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
> - MIB.addReg(t1);
> + assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
> + "Unexpected number of operands");
> +
> + assert(MI->hasOneMemOperand() &&
> + "Expected atomic-load-op to have one memoperand");
> +
> + // Memory Reference
> + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
> + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
>
> - MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> - MIB.addReg(t3);
> - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> - (*MIB).setMemRefs(bInstr->memoperands_begin(),
> - bInstr->memoperands_end());
> + unsigned DstReg, SrcReg;
> + unsigned MemOpndSlot;
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
> - MIB.addReg(EAXreg);
> + unsigned CurOp = 0;
>
> - // insert branch
> - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> + DstReg = MI->getOperand(CurOp++).getReg();
> + MemOpndSlot = CurOp;
> + CurOp += X86::AddrNumOperands;
> + SrcReg = MI->getOperand(CurOp++).getReg();
>
> - bInstr->eraseFromParent(); // The pseudo instruction is gone now.
> - return nextMBB;
> -}
> + const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
> + EVT VT = *RC->vt_begin();
> + unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
>
> -// private utility function: 64 bit atomics on 32 bit host.
> -MachineBasicBlock *
> -X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
> - MachineBasicBlock *MBB,
> - unsigned regOpcL,
> - unsigned regOpcH,
> - unsigned immOpcL,
> - unsigned immOpcH,
> - bool Invert) const {
> - // For the atomic bitwise operator, we generate
> - // thisMBB (instructions are in pairs, except cmpxchg8b)
> - // ld t1,t2 = [bitinstr.addr]
> - // newMBB:
> - // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
> - // op t5, t6 <- out1, out2, [bitinstr.val]
> - // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
> - // neg t7, t8 < t5, t6 (if Invert)
> - // mov ECX, EBX <- t5, t6
> - // mov EAX, EDX <- t1, t2
> - // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
> - // mov t3, t4 <- EAX, EDX
> - // bz newMBB
> - // result in out1, out2
> - // fallthrough -->nextMBB
> + unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
> + unsigned LOADOpc = getLoadOpcode(VT);
>
> - const TargetRegisterClass *RC = &X86::GR32RegClass;
> - const unsigned LoadOpc = X86::MOV32rm;
> - const unsigned NotOpc = X86::NOT32r;
> - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> - MachineFunction::iterator MBBIter = MBB;
> - ++MBBIter;
> + // For the atomic load-arith operator, we generate
> + //
> + // thisMBB:
> + // EAX = LOAD [MI.addr]
> + // mainMBB:
> + // t1 = OP MI.val, EAX
> + // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
> + // JNE mainMBB
> + // sinkMBB:
>
> - /// First build the CFG
> - MachineFunction *F = MBB->getParent();
> MachineBasicBlock *thisMBB = MBB;
> - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - F->insert(MBBIter, newMBB);
> - F->insert(MBBIter, nextMBB);
> -
> - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> - nextMBB->splice(nextMBB->begin(), thisMBB,
> - llvm::next(MachineBasicBlock::iterator(bInstr)),
> - thisMBB->end());
> - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> -
> - // Update thisMBB to fall through to newMBB
> - thisMBB->addSuccessor(newMBB);
> -
> - // newMBB jumps to itself and fall through to nextMBB
> - newMBB->addSuccessor(nextMBB);
> - newMBB->addSuccessor(newMBB);
> -
> - DebugLoc dl = bInstr->getDebugLoc();
> - // Insert instructions into newMBB based on incoming instruction
> - // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
> - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
> - "unexpected number of operands");
> - MachineOperand& dest1Oper = bInstr->getOperand(0);
> - MachineOperand& dest2Oper = bInstr->getOperand(1);
> - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> - for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
> - argOpers[i] = &bInstr->getOperand(i+2);
> -
> - // We use some of the operands multiple times, so conservatively just
> - // clear any kill flags that might be present.
> - if (argOpers[i]->isReg() && argOpers[i]->isUse())
> - argOpers[i]->setIsKill(false);
> - }
> -
> - // x86 address has 5 operands: base, index, scale, displacement, and segment.
> - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> -
> - unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
> - MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> - unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
> - MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
> - // add 4 to displacement.
> - for (int i=0; i <= lastAddrIndx-2; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> - MachineOperand newOp3 = *(argOpers[3]);
> - if (newOp3.isImm())
> - newOp3.setImm(newOp3.getImm()+4);
> - else
> - newOp3.setOffset(newOp3.getOffset()+4);
> - (*MIB).addOperand(newOp3);
> - (*MIB).addOperand(*argOpers[lastAddrIndx]);
> -
> - // t3/4 are defined later, at the bottom of the loop
> - unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
> - unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
> - BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
> - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
> - BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
> - .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
> -
> - // The subsequent operations should be using the destination registers of
> - // the PHI instructions.
> - t1 = dest1Oper.getReg();
> - t2 = dest2Oper.getReg();
> -
> - int valArgIndx = lastAddrIndx + 1;
> - assert((argOpers[valArgIndx]->isReg() ||
> - argOpers[valArgIndx]->isImm()) &&
> - "invalid operand");
> - unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
> - unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
> - if (argOpers[valArgIndx]->isReg())
> - MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
> - else
> - MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
> - if (regOpcL != X86::MOV32rr)
> - MIB.addReg(t1);
> - (*MIB).addOperand(*argOpers[valArgIndx]);
> - assert(argOpers[valArgIndx + 1]->isReg() ==
> - argOpers[valArgIndx]->isReg());
> - assert(argOpers[valArgIndx + 1]->isImm() ==
> - argOpers[valArgIndx]->isImm());
> - if (argOpers[valArgIndx + 1]->isReg())
> - MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
> - else
> - MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
> - if (regOpcH != X86::MOV32rr)
> - MIB.addReg(t2);
> - (*MIB).addOperand(*argOpers[valArgIndx + 1]);
> -
> - unsigned t7, t8;
> - if (Invert) {
> - t7 = F->getRegInfo().createVirtualRegister(RC);
> - t8 = F->getRegInfo().createVirtualRegister(RC);
> - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
> - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
> - } else {
> - t7 = t5;
> - t8 = t6;
> + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
> + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
> + MF->insert(I, mainMBB);
> + MF->insert(I, sinkMBB);
> +
> + MachineInstrBuilder MIB;
> +
> + // Transfer the remainder of BB and its successor edges to sinkMBB.
> + sinkMBB->splice(sinkMBB->begin(), MBB,
> + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
> + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
> +
> + // thisMBB:
> + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
> + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> + MIB.setMemRefs(MMOBegin, MMOEnd);
> +
> + thisMBB->addSuccessor(mainMBB);
> +
> + // mainMBB:
> + MachineBasicBlock *origMainMBB = mainMBB;
> + mainMBB->addLiveIn(AccPhyReg);
> +
> + // Copy AccPhyReg as it is used more than once.
> + unsigned AccReg = MRI.createVirtualRegister(RC);
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
> + .addReg(AccPhyReg);
> +
> + unsigned t1 = MRI.createVirtualRegister(RC);
> + unsigned Opc = MI->getOpcode();
> + switch (Opc) {
> + default:
> + llvm_unreachable("Unhandled atomic-load-op opcode!");
> + case X86::ATOMAND8:
> + case X86::ATOMAND16:
> + case X86::ATOMAND32:
> + case X86::ATOMAND64:
> + case X86::ATOMOR8:
> + case X86::ATOMOR16:
> + case X86::ATOMOR32:
> + case X86::ATOMOR64:
> + case X86::ATOMXOR8:
> + case X86::ATOMXOR16:
> + case X86::ATOMXOR32:
> + case X86::ATOMXOR64: {
> + unsigned ARITHOpc = getNonAtomicOpcode(Opc);
> + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
> + .addReg(AccReg);
> + break;
> + }
> + case X86::ATOMNAND8:
> + case X86::ATOMNAND16:
> + case X86::ATOMNAND32:
> + case X86::ATOMNAND64: {
> + unsigned t2 = MRI.createVirtualRegister(RC);
> + unsigned NOTOpc;
> + unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
> + BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
> + .addReg(AccReg);
> + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
> + break;
> + }
> + case X86::ATOMMAX16:
> + case X86::ATOMMAX32:
> + case X86::ATOMMAX64:
> + case X86::ATOMMIN16:
> + case X86::ATOMMIN32:
> + case X86::ATOMMIN64:
> + case X86::ATOMUMAX16:
> + case X86::ATOMUMAX32:
> + case X86::ATOMUMAX64:
> + case X86::ATOMUMIN16:
> + case X86::ATOMUMIN32:
> + case X86::ATOMUMIN64: {
> + unsigned CMPOpc;
> + unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
> +
> + BuildMI(mainMBB, DL, TII->get(CMPOpc))
> + .addReg(SrcReg)
> + .addReg(AccReg);
> +
> + if (Subtarget->hasCMov()) {
> + // Native support
> + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
> + .addReg(SrcReg)
> + .addReg(AccReg);
> + } else {
> + // Use pseudo select and lower them.
> + assert((VT == MVT::i16 || VT == MVT::i32) &&
> + "Invalid atomic-load-op transformation!");
> + unsigned SelOpc = getPseudoCMOVOpc(VT);
> + X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
> + assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
> + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
> + .addReg(SrcReg).addReg(AccReg)
> + .addImm(CC);
> + mainMBB = EmitLoweredSelect(MIB, mainMBB);
> + }
> + break;
> + }
> }
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
> + // Copy AccPhyReg back from virtual register.
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
> + .addReg(AccReg);
> +
> + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
> + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> MIB.addReg(t1);
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
> - MIB.addReg(t2);
> + MIB.setMemRefs(MMOBegin, MMOEnd);
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
> - MIB.addReg(t7);
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
> - MIB.addReg(t8);
> -
> - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> -
> - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> - (*MIB).setMemRefs(bInstr->memoperands_begin(),
> - bInstr->memoperands_end());
> -
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
> - MIB.addReg(X86::EAX);
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
> - MIB.addReg(X86::EDX);
> + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
>
> - // insert branch
> - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> + mainMBB->addSuccessor(origMainMBB);
> + mainMBB->addSuccessor(sinkMBB);
> +
> + // sinkMBB:
> + sinkMBB->addLiveIn(AccPhyReg);
> +
> + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> + TII->get(TargetOpcode::COPY), DstReg)
> + .addReg(AccPhyReg);
>
> - bInstr->eraseFromParent(); // The pseudo instruction is gone now.
> - return nextMBB;
> + MI->eraseFromParent();
> + return sinkMBB;
> }
>
> -// private utility function
> +// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
> +// instructions. They will be translated into a spin-loop or compare-exchange
> +// loop from
> +//
> +// ...
> +// dst = atomic-fetch-op MI.addr, MI.val
> +// ...
> +//
> +// to
> +//
> +// ...
> +// EAX = LOAD [MI.addr + 0]
> +// EDX = LOAD [MI.addr + 4]
> +// loop:
> +// EBX = OP MI.val.lo, EAX
> +// ECX = OP MI.val.hi, EDX
> +// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
> +// JNE loop
> +// sink:
> +// dst = EDX:EAX
> +// ...
> MachineBasicBlock *
> -X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
> - MachineBasicBlock *MBB,
> - unsigned cmovOpc) const {
> - // For the atomic min/max operator, we generate
> - // thisMBB:
> - // newMBB:
> - // ld t1 = [min/max.addr]
> - // mov t2 = [min/max.val]
> - // cmp t1, t2
> - // cmov[cond] t2 = t1
> - // mov EAX = t1
> - // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
> - // bz newMBB
> - // fallthrough -->nextMBB
> - //
> +X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
> + MachineBasicBlock *MBB) const {
> const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> - MachineFunction::iterator MBBIter = MBB;
> - ++MBBIter;
> + DebugLoc DL = MI->getDebugLoc();
> +
> + MachineFunction *MF = MBB->getParent();
> + MachineRegisterInfo &MRI = MF->getRegInfo();
> +
> + const BasicBlock *BB = MBB->getBasicBlock();
> + MachineFunction::iterator I = MBB;
> + ++I;
> +
> + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
> + "Unexpected number of operands");
> +
> + assert(MI->hasOneMemOperand() &&
> + "Expected atomic-load-op32 to have one memoperand");
> +
> + // Memory Reference
> + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
> + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
> +
> + unsigned DstLoReg, DstHiReg;
> + unsigned SrcLoReg, SrcHiReg;
> + unsigned MemOpndSlot;
> +
> + unsigned CurOp = 0;
> +
> + DstLoReg = MI->getOperand(CurOp++).getReg();
> + DstHiReg = MI->getOperand(CurOp++).getReg();
> + MemOpndSlot = CurOp;
> + CurOp += X86::AddrNumOperands;
> + SrcLoReg = MI->getOperand(CurOp++).getReg();
> + SrcHiReg = MI->getOperand(CurOp++).getReg();
> +
> + const TargetRegisterClass *RC = &X86::GR32RegClass;
> +
> + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
> + unsigned LOADOpc = X86::MOV32rm;
> +
> + // For the atomic load-arith operator, we generate
> + //
> + // thisMBB:
> + // EAX = LOAD [MI.addr + 0]
> + // EDX = LOAD [MI.addr + 4]
> + // mainMBB:
> + // EBX = OP MI.vallo, EAX
> + // ECX = OP MI.valhi, EDX
> + // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
> + // JNE mainMBB
> + // sinkMBB:
>
> - /// First build the CFG
> - MachineFunction *F = MBB->getParent();
> MachineBasicBlock *thisMBB = MBB;
> - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
> - F->insert(MBBIter, newMBB);
> - F->insert(MBBIter, nextMBB);
> -
> - // Transfer the remainder of thisMBB and its successor edges to nextMBB.
> - nextMBB->splice(nextMBB->begin(), thisMBB,
> - llvm::next(MachineBasicBlock::iterator(mInstr)),
> - thisMBB->end());
> - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
> -
> - // Update thisMBB to fall through to newMBB
> - thisMBB->addSuccessor(newMBB);
> -
> - // newMBB jumps to newMBB and fall through to nextMBB
> - newMBB->addSuccessor(nextMBB);
> - newMBB->addSuccessor(newMBB);
> -
> - DebugLoc dl = mInstr->getDebugLoc();
> - // Insert instructions into newMBB based on incoming instruction
> - assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
> - "unexpected number of operands");
> - MachineOperand& destOper = mInstr->getOperand(0);
> - MachineOperand* argOpers[2 + X86::AddrNumOperands];
> - int numArgs = mInstr->getNumOperands() - 1;
> - for (int i=0; i < numArgs; ++i)
> - argOpers[i] = &mInstr->getOperand(i+1);
> -
> - // x86 address has 4 operands: base, index, scale, and displacement
> - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
> - int valArgIndx = lastAddrIndx + 1;
> -
> - unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> -
> - // We only support register and immediate values
> - assert((argOpers[valArgIndx]->isReg() ||
> - argOpers[valArgIndx]->isImm()) &&
> - "invalid operand");
> -
> - unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> - if (argOpers[valArgIndx]->isReg())
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
> - else
> - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
> - (*MIB).addOperand(*argOpers[valArgIndx]);
> + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
> + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
> + MF->insert(I, mainMBB);
> + MF->insert(I, sinkMBB);
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
> - MIB.addReg(t1);
> + MachineInstrBuilder MIB;
>
> - MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
> - MIB.addReg(t1);
> - MIB.addReg(t2);
> + // Transfer the remainder of BB and its successor edges to sinkMBB.
> + sinkMBB->splice(sinkMBB->begin(), MBB,
> + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
> + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
> +
> + // thisMBB:
> + // Lo
> + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
> + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> + MIB.setMemRefs(MMOBegin, MMOEnd);
> + // Hi
> + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
> + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
> + if (i == X86::AddrDisp)
> + MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
> + else
> + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> + }
> + MIB.setMemRefs(MMOBegin, MMOEnd);
>
> - // Generate movc
> - unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
> - MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
> - MIB.addReg(t2);
> - MIB.addReg(t1);
> + thisMBB->addSuccessor(mainMBB);
>
> - // Cmp and exchange if none has modified the memory location
> - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
> - for (int i=0; i <= lastAddrIndx; ++i)
> - (*MIB).addOperand(*argOpers[i]);
> - MIB.addReg(t3);
> - assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
> - (*MIB).setMemRefs(mInstr->memoperands_begin(),
> - mInstr->memoperands_end());
> + // mainMBB:
> + MachineBasicBlock *origMainMBB = mainMBB;
> + mainMBB->addLiveIn(X86::EAX);
> + mainMBB->addLiveIn(X86::EDX);
>
> - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
> - MIB.addReg(X86::EAX);
> + // Copy EDX:EAX as they are used more than once.
> + unsigned LoReg = MRI.createVirtualRegister(RC);
> + unsigned HiReg = MRI.createVirtualRegister(RC);
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
>
> - // insert branch
> - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
> + unsigned t1L = MRI.createVirtualRegister(RC);
> + unsigned t1H = MRI.createVirtualRegister(RC);
>
> - mInstr->eraseFromParent(); // The pseudo instruction is gone now.
> - return nextMBB;
> + unsigned Opc = MI->getOpcode();
> + switch (Opc) {
> + default:
> + llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
> + case X86::ATOMAND6432:
> + case X86::ATOMOR6432:
> + case X86::ATOMXOR6432:
> + case X86::ATOMADD6432:
> + case X86::ATOMSUB6432: {
> + unsigned HiOpc;
> + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
> + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
> + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
> + break;
> + }
> + case X86::ATOMNAND6432: {
> + unsigned HiOpc, NOTOpc;
> + unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
> + unsigned t2L = MRI.createVirtualRegister(RC);
> + unsigned t2H = MRI.createVirtualRegister(RC);
> + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
> + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
> + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
> + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
> + break;
> + }
> + case X86::ATOMSWAP6432: {
> + unsigned HiOpc;
> + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
> + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
> + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
> + break;
> + }
> + }
> +
> + // Copy EDX:EAX back from HiReg:LoReg
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
> + // Copy ECX:EBX from t1H:t1L
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
> + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
> +
> + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
> + for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
> + MIB.addOperand(MI->getOperand(MemOpndSlot + i));
> + MIB.setMemRefs(MMOBegin, MMOEnd);
> +
> + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
> +
> + mainMBB->addSuccessor(origMainMBB);
> + mainMBB->addSuccessor(sinkMBB);
> +
> + // sinkMBB:
> + sinkMBB->addLiveIn(X86::EAX);
> + sinkMBB->addLiveIn(X86::EDX);
> +
> + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> + TII->get(TargetOpcode::COPY), DstLoReg)
> + .addReg(X86::EAX);
> + BuildMI(*sinkMBB, sinkMBB->begin(), DL,
> + TII->get(TargetOpcode::COPY), DstHiReg)
> + .addReg(X86::EDX);
> +
> + MI->eraseFromParent();
> + return sinkMBB;
> }
>
> // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
> @@ -13176,130 +13289,42 @@
> return EmitMonitor(MI, BB);
>
> // Atomic Lowering.
> - case X86::ATOMMIN32:
> - case X86::ATOMMAX32:
> - case X86::ATOMUMIN32:
> - case X86::ATOMUMAX32:
> - case X86::ATOMMIN16:
> - case X86::ATOMMAX16:
> - case X86::ATOMUMIN16:
> - case X86::ATOMUMAX16:
> - case X86::ATOMMIN64:
> - case X86::ATOMMAX64:
> - case X86::ATOMUMIN64:
> - case X86::ATOMUMAX64: {
> - unsigned Opc;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
> - case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
> - case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
> - case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
> - case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
> - case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
> - case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
> - case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
> - case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
> - case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
> - case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
> - case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
> - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
> - }
> - return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
> - }
> -
> - case X86::ATOMAND32:
> - case X86::ATOMOR32:
> - case X86::ATOMXOR32:
> - case X86::ATOMNAND32: {
> - bool Invert = false;
> - unsigned RegOpc, ImmOpc;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMAND32:
> - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
> - case X86::ATOMOR32:
> - RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
> - case X86::ATOMXOR32:
> - RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
> - case X86::ATOMNAND32:
> - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
> - }
> - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> - X86::MOV32rm, X86::LCMPXCHG32,
> - X86::NOT32r, X86::EAX,
> - &X86::GR32RegClass, Invert);
> - }
> -
> + case X86::ATOMAND8:
> case X86::ATOMAND16:
> + case X86::ATOMAND32:
> + case X86::ATOMAND64:
> + // Fall through
> + case X86::ATOMOR8:
> case X86::ATOMOR16:
> + case X86::ATOMOR32:
> + case X86::ATOMOR64:
> + // Fall through
> case X86::ATOMXOR16:
> - case X86::ATOMNAND16: {
> - bool Invert = false;
> - unsigned RegOpc, ImmOpc;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMAND16:
> - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
> - case X86::ATOMOR16:
> - RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
> - case X86::ATOMXOR16:
> - RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
> - case X86::ATOMNAND16:
> - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
> - }
> - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> - X86::MOV16rm, X86::LCMPXCHG16,
> - X86::NOT16r, X86::AX,
> - &X86::GR16RegClass, Invert);
> - }
> -
> - case X86::ATOMAND8:
> - case X86::ATOMOR8:
> case X86::ATOMXOR8:
> - case X86::ATOMNAND8: {
> - bool Invert = false;
> - unsigned RegOpc, ImmOpc;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMAND8:
> - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
> - case X86::ATOMOR8:
> - RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
> - case X86::ATOMXOR8:
> - RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
> - case X86::ATOMNAND8:
> - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
> - }
> - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> - X86::MOV8rm, X86::LCMPXCHG8,
> - X86::NOT8r, X86::AL,
> - &X86::GR8RegClass, Invert);
> - }
> -
> - // This group is for 64-bit host.
> - case X86::ATOMAND64:
> - case X86::ATOMOR64:
> + case X86::ATOMXOR32:
> case X86::ATOMXOR64:
> - case X86::ATOMNAND64: {
> - bool Invert = false;
> - unsigned RegOpc, ImmOpc;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMAND64:
> - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
> - case X86::ATOMOR64:
> - RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
> - case X86::ATOMXOR64:
> - RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
> - case X86::ATOMNAND64:
> - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
> - }
> - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
> - X86::MOV64rm, X86::LCMPXCHG64,
> - X86::NOT64r, X86::RAX,
> - &X86::GR64RegClass, Invert);
> - }
> + // Fall through
> + case X86::ATOMNAND8:
> + case X86::ATOMNAND16:
> + case X86::ATOMNAND32:
> + case X86::ATOMNAND64:
> + // Fall through
> + case X86::ATOMMAX16:
> + case X86::ATOMMAX32:
> + case X86::ATOMMAX64:
> + // Fall through
> + case X86::ATOMMIN16:
> + case X86::ATOMMIN32:
> + case X86::ATOMMIN64:
> + // Fall through
> + case X86::ATOMUMAX16:
> + case X86::ATOMUMAX32:
> + case X86::ATOMUMAX64:
> + // Fall through
> + case X86::ATOMUMIN16:
> + case X86::ATOMUMIN32:
> + case X86::ATOMUMIN64:
> + return EmitAtomicLoadArith(MI, BB);
>
> // This group does 64-bit operations on a 32-bit host.
> case X86::ATOMAND6432:
> @@ -13308,44 +13333,8 @@
> case X86::ATOMNAND6432:
> case X86::ATOMADD6432:
> case X86::ATOMSUB6432:
> - case X86::ATOMSWAP6432: {
> - bool Invert = false;
> - unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("illegal opcode!");
> - case X86::ATOMAND6432:
> - RegOpcL = RegOpcH = X86::AND32rr;
> - ImmOpcL = ImmOpcH = X86::AND32ri;
> - break;
> - case X86::ATOMOR6432:
> - RegOpcL = RegOpcH = X86::OR32rr;
> - ImmOpcL = ImmOpcH = X86::OR32ri;
> - break;
> - case X86::ATOMXOR6432:
> - RegOpcL = RegOpcH = X86::XOR32rr;
> - ImmOpcL = ImmOpcH = X86::XOR32ri;
> - break;
> - case X86::ATOMNAND6432:
> - RegOpcL = RegOpcH = X86::AND32rr;
> - ImmOpcL = ImmOpcH = X86::AND32ri;
> - Invert = true;
> - break;
> - case X86::ATOMADD6432:
> - RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
> - ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
> - break;
> - case X86::ATOMSUB6432:
> - RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
> - ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
> - break;
> - case X86::ATOMSWAP6432:
> - RegOpcL = RegOpcH = X86::MOV32rr;
> - ImmOpcL = ImmOpcH = X86::MOV32ri;
> - break;
> - }
> - return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
> - ImmOpcL, ImmOpcH, Invert);
> - }
> + case X86::ATOMSWAP6432:
> + return EmitAtomicLoadArith6432(MI, BB);
>
> case X86::VASTART_SAVE_XMM_REGS:
> return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Sep 19 22:06:15 2012
> @@ -861,36 +861,17 @@
> MachineBasicBlock *BB) const;
> MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
>
> - /// Utility function to emit atomic bitwise operations (and, or, xor).
> - /// It takes the bitwise instruction to expand, the associated machine basic
> - /// block, and the associated X86 opcodes for reg/reg and reg/imm.
> - MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
> - MachineInstr *BInstr,
> - MachineBasicBlock *BB,
> - unsigned regOpc,
> - unsigned immOpc,
> - unsigned loadOpc,
> - unsigned cxchgOpc,
> - unsigned notOpc,
> - unsigned EAXreg,
> - const TargetRegisterClass *RC,
> - bool Invert = false) const;
> -
> - MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
> - MachineInstr *BInstr,
> - MachineBasicBlock *BB,
> - unsigned regOpcL,
> - unsigned regOpcH,
> - unsigned immOpcL,
> - unsigned immOpcH,
> - bool Invert = false) const;
> -
> - /// Utility function to emit atomic min and max. It takes the min/max
> - /// instruction to expand, the associated basic block, and the associated
> - /// cmov opcode for moving the min or max value.
> - MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
> - MachineBasicBlock *BB,
> - unsigned cmovOpc) const;
> + /// Utility function to emit atomic-load-arith operations (and, or, xor,
> + /// nand, max, min, umax, umin). It takes the corresponding instruction to
> + /// expand, the associated machine basic block, and the associated X86
> + /// opcodes for reg/reg.
> + MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
> + MachineBasicBlock *MBB) const;
> +
> + /// Utility function to emit atomic-load-arith operations (and, or, xor,
> + /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
> + MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
> + MachineBasicBlock *MBB) const;
>
> // Utility function to emit the low-level va_arg code for X86-64.
> MachineBasicBlock *EmitVAARG64WithCustomInserter(
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Wed Sep 19 22:06:15 2012
> @@ -483,8 +483,7 @@
> //===----------------------------------------------------------------------===//
>
> // Atomic exchange, and, or, xor
> -let Constraints = "$val = $dst", Defs = [EFLAGS],
> - usesCustomInserter = 1 in {
> +let usesCustomInserter = 1 in {
>
> def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
> "#ATOMAND8 PSEUDO!",
> @@ -578,11 +577,7 @@
> [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
> }
>
> -let Constraints = "$val1 = $dst1, $val2 = $dst2",
> - Defs = [EFLAGS, EAX, EBX, ECX, EDX],
> - Uses = [EAX, EBX, ECX, EDX],
> - mayLoad = 1, mayStore = 1,
> - usesCustomInserter = 1 in {
> +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in {
> def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
> (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
> "#ATOMAND6432 PSEUDO!", []>;
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Sep 19 22:06:15 2012
> @@ -2266,7 +2266,7 @@
> }
>
> /// getCondFromCmovOpc - return condition code of a CMov opcode.
> -static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
> +X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
> switch (Opc) {
> default: return X86::COND_INVALID;
> case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
> @@ -3314,7 +3314,7 @@
> if (OldCC != X86::COND_INVALID)
> OpcIsSET = true;
> else
> - OldCC = getCondFromCMovOpc(Instr.getOpcode());
> + OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
> }
> if (OldCC == X86::COND_INVALID) return false;
> }
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Sep 19 22:06:15 2012
> @@ -61,6 +61,9 @@
> // Turn condition code into conditional branch opcode.
> unsigned GetCondBranchFromCond(CondCode CC);
>
> + // Turn CMov opcode into condition code.
> + CondCode getCondFromCMovOpc(unsigned Opc);
> +
> /// GetOppositeBranchCondition - Return the inverse of the specified cond,
> /// e.g. turning COND_E to COND_NE.
> CondCode GetOppositeBranchCondition(X86::CondCode CC);
>
> Modified: llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll Wed Sep 19 22:06:15 2012
> @@ -7,17 +7,16 @@
> define void @t(i64* nocapture %p) nounwind ssp {
> entry:
> ; CHECK: t:
> -; CHECK: movl $1
> -; CHECK: movl (%ebp), %eax
> -; CHECK: movl 4(%ebp), %edx
> +; CHECK: movl ([[REG:%[a-z]+]]), %eax
> +; CHECK: movl 4([[REG]]), %edx
> ; CHECK: LBB0_1:
> -; CHECK-NOT: movl $1
> -; CHECK-NOT: movl $0
> +; CHECK: movl $1
> ; CHECK: addl
> +; CHECK: movl $0
> ; CHECK: adcl
> ; CHECK: lock
> -; CHECK: cmpxchg8b
> -; CHECK: jne
> +; CHECK-NEXT: cmpxchg8b ([[REG]])
> +; CHECK-NEXT: jne
> %0 = atomicrmw add i64* %p, i64 1 seq_cst
> ret void
> }
>
> Added: llvm/trunk/test/CodeGen/X86/atomic16.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic16.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic16.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atomic16.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,250 @@
> +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> +
> + at sc16 = external global i16
> +
> +define void @atomic_fetch_add16() nounwind {
> +; X64: atomic_fetch_add16
> +; X32: atomic_fetch_add16
> +entry:
> +; 32-bit
> + %t1 = atomicrmw add i16* @sc16, i16 1 acquire
> +; X64: lock
> +; X64: incw
> +; X32: lock
> +; X32: incw
> + %t2 = atomicrmw add i16* @sc16, i16 3 acquire
> +; X64: lock
> +; X64: addw $3
> +; X32: lock
> +; X32: addw $3
> + %t3 = atomicrmw add i16* @sc16, i16 5 acquire
> +; X64: lock
> +; X64: xaddw
> +; X32: lock
> +; X32: xaddw
> + %t4 = atomicrmw add i16* @sc16, i16 %t3 acquire
> +; X64: lock
> +; X64: addw
> +; X32: lock
> +; X32: addw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_sub16() nounwind {
> +; X64: atomic_fetch_sub16
> +; X32: atomic_fetch_sub16
> + %t1 = atomicrmw sub i16* @sc16, i16 1 acquire
> +; X64: lock
> +; X64: decw
> +; X32: lock
> +; X32: decw
> + %t2 = atomicrmw sub i16* @sc16, i16 3 acquire
> +; X64: lock
> +; X64: subw $3
> +; X32: lock
> +; X32: subw $3
> + %t3 = atomicrmw sub i16* @sc16, i16 5 acquire
> +; X64: lock
> +; X64: xaddw
> +; X32: lock
> +; X32: xaddw
> + %t4 = atomicrmw sub i16* @sc16, i16 %t3 acquire
> +; X64: lock
> +; X64: subw
> +; X32: lock
> +; X32: subw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_and16() nounwind {
> +; X64: atomic_fetch_and16
> +; X32: atomic_fetch_and16
> + %t1 = atomicrmw and i16* @sc16, i16 3 acquire
> +; X64: lock
> +; X64: andw $3
> +; X32: lock
> +; X32: andw $3
> + %t2 = atomicrmw and i16* @sc16, i16 5 acquire
> +; X64: andw
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: andw
> +; X32: lock
> +; X32: cmpxchgw
> + %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
> +; X64: lock
> +; X64: andw
> +; X32: lock
> +; X32: andw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_or16() nounwind {
> +; X64: atomic_fetch_or16
> +; X32: atomic_fetch_or16
> + %t1 = atomicrmw or i16* @sc16, i16 3 acquire
> +; X64: lock
> +; X64: orw $3
> +; X32: lock
> +; X32: orw $3
> + %t2 = atomicrmw or i16* @sc16, i16 5 acquire
> +; X64: orw
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: orw
> +; X32: lock
> +; X32: cmpxchgw
> + %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
> +; X64: lock
> +; X64: orw
> +; X32: lock
> +; X32: orw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_xor16() nounwind {
> +; X64: atomic_fetch_xor16
> +; X32: atomic_fetch_xor16
> + %t1 = atomicrmw xor i16* @sc16, i16 3 acquire
> +; X64: lock
> +; X64: xorw $3
> +; X32: lock
> +; X32: xorw $3
> + %t2 = atomicrmw xor i16* @sc16, i16 5 acquire
> +; X64: xorw
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: xorw
> +; X32: lock
> +; X32: cmpxchgw
> + %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
> +; X64: lock
> +; X64: xorw
> +; X32: lock
> +; X32: xorw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_nand16(i16 %x) nounwind {
> +; X64: atomic_fetch_nand16
> +; X32: atomic_fetch_nand16
> + %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
> +; X64: andw
> +; X64: notw
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: andw
> +; X32: notw
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_max16(i16 %x) nounwind {
> + %t1 = atomicrmw max i16* @sc16, i16 %x acquire
> +; X64: cmpw
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgw
> +
> +; X32: cmpw
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_min16(i16 %x) nounwind {
> + %t1 = atomicrmw min i16* @sc16, i16 %x acquire
> +; X64: cmpw
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgw
> +
> +; X32: cmpw
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umax16(i16 %x) nounwind {
> + %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
> +; X64: cmpw
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgw
> +
> +; X32: cmpw
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umin16(i16 %x) nounwind {
> + %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
> +; X64: cmpw
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: cmpw
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_cmpxchg16() nounwind {
> + %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire
> +; X64: lock
> +; X64: cmpxchgw
> +; X32: lock
> +; X32: cmpxchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_store16(i16 %x) nounwind {
> + store atomic i16 %x, i16* @sc16 release, align 4
> +; X64-NOT: lock
> +; X64: movw
> +; X32-NOT: lock
> +; X32: movw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_swap16(i16 %x) nounwind {
> + %t1 = atomicrmw xchg i16* @sc16, i16 %x acquire
> +; X64-NOT: lock
> +; X64: xchgw
> +; X32-NOT: lock
> +; X32: xchgw
> + ret void
> +; X64: ret
> +; X32: ret
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/atomic32.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic32.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic32.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atomic32.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,250 @@
> +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> +
> + at sc32 = external global i32
> +
> +define void @atomic_fetch_add32() nounwind {
> +; X64: atomic_fetch_add32
> +; X32: atomic_fetch_add32
> +entry:
> +; 32-bit
> + %t1 = atomicrmw add i32* @sc32, i32 1 acquire
> +; X64: lock
> +; X64: incl
> +; X32: lock
> +; X32: incl
> + %t2 = atomicrmw add i32* @sc32, i32 3 acquire
> +; X64: lock
> +; X64: addl $3
> +; X32: lock
> +; X32: addl $3
> + %t3 = atomicrmw add i32* @sc32, i32 5 acquire
> +; X64: lock
> +; X64: xaddl
> +; X32: lock
> +; X32: xaddl
> + %t4 = atomicrmw add i32* @sc32, i32 %t3 acquire
> +; X64: lock
> +; X64: addl
> +; X32: lock
> +; X32: addl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_sub32() nounwind {
> +; X64: atomic_fetch_sub32
> +; X32: atomic_fetch_sub32
> + %t1 = atomicrmw sub i32* @sc32, i32 1 acquire
> +; X64: lock
> +; X64: decl
> +; X32: lock
> +; X32: decl
> + %t2 = atomicrmw sub i32* @sc32, i32 3 acquire
> +; X64: lock
> +; X64: subl $3
> +; X32: lock
> +; X32: subl $3
> + %t3 = atomicrmw sub i32* @sc32, i32 5 acquire
> +; X64: lock
> +; X64: xaddl
> +; X32: lock
> +; X32: xaddl
> + %t4 = atomicrmw sub i32* @sc32, i32 %t3 acquire
> +; X64: lock
> +; X64: subl
> +; X32: lock
> +; X32: subl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_and32() nounwind {
> +; X64: atomic_fetch_and32
> +; X32: atomic_fetch_and32
> + %t1 = atomicrmw and i32* @sc32, i32 3 acquire
> +; X64: lock
> +; X64: andl $3
> +; X32: lock
> +; X32: andl $3
> + %t2 = atomicrmw and i32* @sc32, i32 5 acquire
> +; X64: andl
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: andl
> +; X32: lock
> +; X32: cmpxchgl
> + %t3 = atomicrmw and i32* @sc32, i32 %t2 acquire
> +; X64: lock
> +; X64: andl
> +; X32: lock
> +; X32: andl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_or32() nounwind {
> +; X64: atomic_fetch_or32
> +; X32: atomic_fetch_or32
> + %t1 = atomicrmw or i32* @sc32, i32 3 acquire
> +; X64: lock
> +; X64: orl $3
> +; X32: lock
> +; X32: orl $3
> + %t2 = atomicrmw or i32* @sc32, i32 5 acquire
> +; X64: orl
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: orl
> +; X32: lock
> +; X32: cmpxchgl
> + %t3 = atomicrmw or i32* @sc32, i32 %t2 acquire
> +; X64: lock
> +; X64: orl
> +; X32: lock
> +; X32: orl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_xor32() nounwind {
> +; X64: atomic_fetch_xor32
> +; X32: atomic_fetch_xor32
> + %t1 = atomicrmw xor i32* @sc32, i32 3 acquire
> +; X64: lock
> +; X64: xorl $3
> +; X32: lock
> +; X32: xorl $3
> + %t2 = atomicrmw xor i32* @sc32, i32 5 acquire
> +; X64: xorl
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: xorl
> +; X32: lock
> +; X32: cmpxchgl
> + %t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire
> +; X64: lock
> +; X64: xorl
> +; X32: lock
> +; X32: xorl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_nand32(i32 %x) nounwind {
> +; X64: atomic_fetch_nand32
> +; X32: atomic_fetch_nand32
> + %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
> +; X64: andl
> +; X64: notl
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: andl
> +; X32: notl
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_max32(i32 %x) nounwind {
> + %t1 = atomicrmw max i32* @sc32, i32 %x acquire
> +; X64: cmpl
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgl
> +
> +; X32: cmpl
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_min32(i32 %x) nounwind {
> + %t1 = atomicrmw min i32* @sc32, i32 %x acquire
> +; X64: cmpl
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgl
> +
> +; X32: cmpl
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umax32(i32 %x) nounwind {
> + %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
> +; X64: cmpl
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgl
> +
> +; X32: cmpl
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umin32(i32 %x) nounwind {
> + %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
> +; X64: cmpl
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: cmpl
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_cmpxchg32() nounwind {
> + %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire
> +; X64: lock
> +; X64: cmpxchgl
> +; X32: lock
> +; X32: cmpxchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_store32(i32 %x) nounwind {
> + store atomic i32 %x, i32* @sc32 release, align 4
> +; X64-NOT: lock
> +; X64: movl
> +; X32-NOT: lock
> +; X32: movl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_swap32(i32 %x) nounwind {
> + %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
> +; X64-NOT: lock
> +; X64: xchgl
> +; X32-NOT: lock
> +; X32: xchgl
> + ret void
> +; X64: ret
> +; X32: ret
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/atomic64.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic64.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic64.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atomic64.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,216 @@
> +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> +
> + at sc64 = external global i64
> +
> +define void @atomic_fetch_add64() nounwind {
> +; X64: atomic_fetch_add64
> +entry:
> + %t1 = atomicrmw add i64* @sc64, i64 1 acquire
> +; X64: lock
> +; X64: incq
> + %t2 = atomicrmw add i64* @sc64, i64 3 acquire
> +; X64: lock
> +; X64: addq $3
> + %t3 = atomicrmw add i64* @sc64, i64 5 acquire
> +; X64: lock
> +; X64: xaddq
> + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
> +; X64: lock
> +; X64: addq
> + ret void
> +; X64: ret
> +}
> +
> +define void @atomic_fetch_sub64() nounwind {
> +; X64: atomic_fetch_sub64
> + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
> +; X64: lock
> +; X64: decq
> + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
> +; X64: lock
> +; X64: subq $3
> + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
> +; X64: lock
> +; X64: xaddq
> + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
> +; X64: lock
> +; X64: subq
> + ret void
> +; X64: ret
> +}
> +
> +define void @atomic_fetch_and64() nounwind {
> +; X64: atomic_fetch_and64
> + %t1 = atomicrmw and i64* @sc64, i64 3 acquire
> +; X64: lock
> +; X64: andq $3
> + %t2 = atomicrmw and i64* @sc64, i64 5 acquire
> +; X64: andq
> +; X64: lock
> +; X64: cmpxchgq
> + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
> +; X64: lock
> +; X64: andq
> + ret void
> +; X64: ret
> +}
> +
> +define void @atomic_fetch_or64() nounwind {
> +; X64: atomic_fetch_or64
> + %t1 = atomicrmw or i64* @sc64, i64 3 acquire
> +; X64: lock
> +; X64: orq $3
> + %t2 = atomicrmw or i64* @sc64, i64 5 acquire
> +; X64: orq
> +; X64: lock
> +; X64: cmpxchgq
> + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
> +; X64: lock
> +; X64: orq
> + ret void
> +; X64: ret
> +}
> +
> +define void @atomic_fetch_xor64() nounwind {
> +; X64: atomic_fetch_xor64
> + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
> +; X64: lock
> +; X64: xorq $3
> + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
> +; X64: xorq
> +; X64: lock
> +; X64: cmpxchgq
> + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
> +; X64: lock
> +; X64: xorq
> + ret void
> +; X64: ret
> +}
> +
> +define void @atomic_fetch_nand64(i64 %x) nounwind {
> +; X64: atomic_fetch_nand64
> +; X32: atomic_fetch_nand64
> + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
> +; X64: andq
> +; X64: notq
> +; X64: lock
> +; X64: cmpxchgq
> +; X32: andl
> +; X32: andl
> +; X32: notl
> +; X32: notl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_max64(i64 %x) nounwind {
> + %t1 = atomicrmw max i64* @sc64, i64 %x acquire
> +; X64: cmpq
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgq
> +
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_min64(i64 %x) nounwind {
> + %t1 = atomicrmw min i64* @sc64, i64 %x acquire
> +; X64: cmpq
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgq
> +
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umax64(i64 %x) nounwind {
> + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
> +; X64: cmpq
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgq
> +
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umin64(i64 %x) nounwind {
> + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
> +; X64: cmpq
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgq
> +
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_cmpxchg64() nounwind {
> + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
> +; X64: lock
> +; X64: cmpxchgq
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_store64(i64 %x) nounwind {
> + store atomic i64 %x, i64* @sc64 release, align 8
> +; X64-NOT: lock
> +; X64: movq
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_swap64(i64 %x) nounwind {
> + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
> +; X64-NOT: lock
> +; X64: xchgq
> +; X32: lock
> +; X32: xchg8b
> + ret void
> +; X64: ret
> +; X32: ret
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/atomic6432.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic6432.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic6432.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atomic6432.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,209 @@
> +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> +; XFAIL: *
> +
> + at sc64 = external global i64
> +
> +define void @atomic_fetch_add64() nounwind {
> +; X32: atomic_fetch_add64
> +entry:
> + %t1 = atomicrmw add i64* @sc64, i64 1 acquire
> +; X32: addl
> +; X32: adcl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t2 = atomicrmw add i64* @sc64, i64 3 acquire
> +; X32: addl
> +; X32: adcl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t3 = atomicrmw add i64* @sc64, i64 5 acquire
> +; X32: addl
> +; X32: adcl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
> +; X32: addl
> +; X32: adcl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_sub64() nounwind {
> +; X32: atomic_fetch_sub64
> + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
> +; X32: subl
> +; X32: sbbl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
> +; X32: subl
> +; X32: sbbl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
> +; X32: subl
> +; X32: sbbl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
> +; X32: subl
> +; X32: sbbl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_and64() nounwind {
> +; X32: atomic_fetch_and64
> + %t1 = atomicrmw and i64* @sc64, i64 3 acquire
> +; X32: andl
> +; X32: andl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t2 = atomicrmw and i64* @sc64, i64 5 acquire
> +; X32: andl
> +; X32: andl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
> +; X32: andl
> +; X32: andl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_or64() nounwind {
> +; X32: atomic_fetch_or64
> + %t1 = atomicrmw or i64* @sc64, i64 3 acquire
> +; X32: orl
> +; X32: orl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t2 = atomicrmw or i64* @sc64, i64 5 acquire
> +; X32: orl
> +; X32: orl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
> +; X32: orl
> +; X32: orl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_xor64() nounwind {
> +; X32: atomic_fetch_xor64
> + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
> +; X32: xorl
> +; X32: xorl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
> +; X32: xorl
> +; X32: xorl
> +; X32: lock
> +; X32: cmpxchg8b
> + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
> +; X32: xorl
> +; X32: xorl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_nand64(i64 %x) nounwind {
> +; X32: atomic_fetch_nand64
> + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
> +; X32: andl
> +; X32: andl
> +; X32: notl
> +; X32: notl
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_max64(i64 %x) nounwind {
> + %t1 = atomicrmw max i64* @sc64, i64 %x acquire
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_min64(i64 %x) nounwind {
> + %t1 = atomicrmw min i64* @sc64, i64 %x acquire
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umax64(i64 %x) nounwind {
> + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umin64(i64 %x) nounwind {
> + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
> +; X32: cmpl
> +; X32: cmpl
> +; X32: cmov
> +; X32: cmov
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_cmpxchg64() nounwind {
> + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_store64(i64 %x) nounwind {
> + store atomic i64 %x, i64* @sc64 release, align 8
> +; X32: lock
> +; X32: cmpxchg8b
> + ret void
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_swap64(i64 %x) nounwind {
> + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
> +; X32: lock
> +; X32: xchg8b
> + ret void
> +; X32: ret
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/atomic8.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic8.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic8.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/atomic8.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,251 @@
> +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
> +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
> +; XFAIL: *
> +
> + at sc8 = external global i8
> +
> +define void @atomic_fetch_add8() nounwind {
> +; X64: atomic_fetch_add8
> +; X32: atomic_fetch_add8
> +entry:
> +; 32-bit
> + %t1 = atomicrmw add i8* @sc8, i8 1 acquire
> +; X64: lock
> +; X64: incb
> +; X32: lock
> +; X32: incb
> + %t2 = atomicrmw add i8* @sc8, i8 3 acquire
> +; X64: lock
> +; X64: addb $3
> +; X32: lock
> +; X32: addb $3
> + %t3 = atomicrmw add i8* @sc8, i8 5 acquire
> +; X64: lock
> +; X64: xaddb
> +; X32: lock
> +; X32: xaddb
> + %t4 = atomicrmw add i8* @sc8, i8 %t3 acquire
> +; X64: lock
> +; X64: addb
> +; X32: lock
> +; X32: addb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_sub8() nounwind {
> +; X64: atomic_fetch_sub8
> +; X32: atomic_fetch_sub8
> + %t1 = atomicrmw sub i8* @sc8, i8 1 acquire
> +; X64: lock
> +; X64: decb
> +; X32: lock
> +; X32: decb
> + %t2 = atomicrmw sub i8* @sc8, i8 3 acquire
> +; X64: lock
> +; X64: subb $3
> +; X32: lock
> +; X32: subb $3
> + %t3 = atomicrmw sub i8* @sc8, i8 5 acquire
> +; X64: lock
> +; X64: xaddb
> +; X32: lock
> +; X32: xaddb
> + %t4 = atomicrmw sub i8* @sc8, i8 %t3 acquire
> +; X64: lock
> +; X64: subb
> +; X32: lock
> +; X32: subb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_and8() nounwind {
> +; X64: atomic_fetch_and8
> +; X32: atomic_fetch_and8
> + %t1 = atomicrmw and i8* @sc8, i8 3 acquire
> +; X64: lock
> +; X64: andb $3
> +; X32: lock
> +; X32: andb $3
> + %t2 = atomicrmw and i8* @sc8, i8 5 acquire
> +; X64: andb
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: andb
> +; X32: lock
> +; X32: cmpxchgb
> + %t3 = atomicrmw and i8* @sc8, i8 %t2 acquire
> +; X64: lock
> +; X64: andb
> +; X32: lock
> +; X32: andb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_or8() nounwind {
> +; X64: atomic_fetch_or8
> +; X32: atomic_fetch_or8
> + %t1 = atomicrmw or i8* @sc8, i8 3 acquire
> +; X64: lock
> +; X64: orb $3
> +; X32: lock
> +; X32: orb $3
> + %t2 = atomicrmw or i8* @sc8, i8 5 acquire
> +; X64: orb
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: orb
> +; X32: lock
> +; X32: cmpxchgb
> + %t3 = atomicrmw or i8* @sc8, i8 %t2 acquire
> +; X64: lock
> +; X64: orb
> +; X32: lock
> +; X32: orb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_xor8() nounwind {
> +; X64: atomic_fetch_xor8
> +; X32: atomic_fetch_xor8
> + %t1 = atomicrmw xor i8* @sc8, i8 3 acquire
> +; X64: lock
> +; X64: xorb $3
> +; X32: lock
> +; X32: xorb $3
> + %t2 = atomicrmw xor i8* @sc8, i8 5 acquire
> +; X64: xorb
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: xorb
> +; X32: lock
> +; X32: cmpxchgb
> + %t3 = atomicrmw xor i8* @sc8, i8 %t2 acquire
> +; X64: lock
> +; X64: xorb
> +; X32: lock
> +; X32: xorb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_nand8(i8 %x) nounwind {
> +; X64: atomic_fetch_nand8
> +; X32: atomic_fetch_nand8
> + %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
> +; X64: andb
> +; X64: notb
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: andb
> +; X32: notb
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_max8(i8 %x) nounwind {
> + %t1 = atomicrmw max i8* @sc8, i8 %x acquire
> +; X64: cmpb
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgb
> +
> +; X32: cmpb
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_min8(i8 %x) nounwind {
> + %t1 = atomicrmw min i8* @sc8, i8 %x acquire
> +; X64: cmpb
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgb
> +
> +; X32: cmpb
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umax8(i8 %x) nounwind {
> + %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
> +; X64: cmpb
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgb
> +
> +; X32: cmpb
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_umin8(i8 %x) nounwind {
> + %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
> +; X64: cmpb
> +; X64: cmov
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: cmpb
> +; X32: cmov
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_cmpxchg8() nounwind {
> + %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire
> +; X64: lock
> +; X64: cmpxchgb
> +; X32: lock
> +; X32: cmpxchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_store8(i8 %x) nounwind {
> + store atomic i8 %x, i8* @sc8 release, align 4
> +; X64-NOT: lock
> +; X64: movb
> +; X32-NOT: lock
> +; X32: movb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
> +
> +define void @atomic_fetch_swap8(i8 %x) nounwind {
> + %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
> +; X64-NOT: lock
> +; X64: xchgb
> +; X32-NOT: lock
> +; X32: xchgb
> + ret void
> +; X64: ret
> +; X32: ret
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/atomic_op.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic_op.ll?rev=164281&r1=164280&r2=164281&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/atomic_op.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/atomic_op.ll Wed Sep 19 22:06:15 2012
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
> +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
>
> target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
>
> @@ -107,13 +107,12 @@
> ; CHECK: cmpxchgl
> %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
> store i32 %17, i32* %old
> + ; CHECK: movl [[R17atomic:.*]], %eax
> ; CHECK: movl $1401, %[[R17mask:[a-z]*]]
> - ; CHECK: movl [[R17atomic:.*]], %eax
> - ; CHECK: movl %eax, %[[R17newval:[a-z]*]]
> - ; CHECK: andl %[[R17mask]], %[[R17newval]]
> - ; CHECK: notl %[[R17newval]]
> + ; CHECK: andl %eax, %[[R17mask]]
> + ; CHECK: notl %[[R17mask]]
> ; CHECK: lock
> - ; CHECK: cmpxchgl %[[R17newval]], [[R17atomic]]
> + ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
> ; CHECK: jne
> ; CHECK: movl %eax,
> %18 = atomicrmw nand i32* %val2, i32 1401 monotonic
>
> Added: llvm/trunk/test/CodeGen/X86/pr13458.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr13458.ll?rev=164281&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pr13458.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/pr13458.ll Wed Sep 19 22:06:15 2012
> @@ -0,0 +1,14 @@
> +; RUN: llc < %s
> +
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +target triple = "x86_64-apple-darwin11.4.2"
> +
> +%v8_uniform_Stats.0.2.4.10 = type { i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, [7 x i32], [7 x i64] }
> +
> + at globalStats = external global %v8_uniform_Stats.0.2.4.10
> +
> +define void @MergeStats() nounwind {
> +allocas:
> + %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list