[llvm] 0b74e34 - Transform AtomicRMW logic operations to BT{R|C|S} if only changing/testing a single bit.
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 16 22:09:51 PST 2023
Author: Noah Goldstein
Date: 2023-01-16T22:05:47-08:00
New Revision: 0b74e34938ba6cb89cbd197835219a6970ebaf39
URL: https://github.com/llvm/llvm-project/commit/0b74e34938ba6cb89cbd197835219a6970ebaf39
DIFF: https://github.com/llvm/llvm-project/commit/0b74e34938ba6cb89cbd197835219a6970ebaf39.diff
LOG: Transform AtomicRMW logic operations to BT{R|C|S} if only changing/testing a single bit.
This is essentially expanding on the optimizations added on: D120199
but applies the optimization to cases where the bit being changed /
tested is not am IMM but is a provable power of 2.
The only case currently added for cases like:
`__atomic_fetch_xor(p, 1 << c, __ATOMIC_RELAXED) & (1 << c)`
Which instead of using a `cmpxchg` loop can be done with `btcl; setcc; shl`.
There are still a variety of missed cases that could/should be
addressed in the future. This commit documents many of those
cases with Todos.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D140939
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 4d04e8fbfbb11..239f15809e29e 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -70,6 +70,14 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<1>>]>;
def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
[ImmArg<ArgIndex<1>>]>;
+ def int_x86_atomic_bts_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty],
+ []>;
+ def int_x86_atomic_btc_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty],
+ []>;
+ def int_x86_atomic_btr_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty],
+ []>;
+
+
}
// Lock binary arith with CC.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e3e04a2fd68c9..c88c66d8b2edc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5654,6 +5654,18 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::x86_atomic_bts_rm:
+ case Intrinsic::x86_atomic_btc_rm:
+ case Intrinsic::x86_atomic_btr_rm: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
+ Info.align = Align(Size);
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
case Intrinsic::x86_aadd32:
case Intrinsic::x86_aadd64:
case Intrinsic::x86_aand32:
@@ -28364,6 +28376,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
+ case Intrinsic::x86_atomic_bts_rm:
+ case Intrinsic::x86_atomic_btc_rm:
+ case Intrinsic::x86_atomic_btr_rm: {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(2);
+ SDValue Op2 = Op.getOperand(3);
+ unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ? X86ISD::LBTS_RM
+ : IntNo == Intrinsic::x86_atomic_btc_rm ? X86ISD::LBTC_RM
+ : X86ISD::LBTR_RM;
+ MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
+ SDValue Res =
+ DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
+ {Chain, Op1, Op2}, VT, MMO);
+ Chain = Res.getValue(1);
+ Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
+ }
case Intrinsic::x86_atomic_bts:
case Intrinsic::x86_atomic_btc:
case Intrinsic::x86_atomic_btr: {
@@ -31401,6 +31432,75 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::None;
}
+enum BitTestKind : unsigned {
+ UndefBit,
+ ConstantBit,
+ NotConstantBit,
+ ShiftBit,
+ NotShiftBit
+};
+
+static std::pair<Value *, BitTestKind> FindSingleBitChange(Value *V) {
+ using namespace llvm::PatternMatch;
+ BitTestKind BTK = UndefBit;
+ auto *C = dyn_cast<ConstantInt>(V);
+ if (C) {
+ // Check if V is a power of 2 or NOT power of 2.
+ if (isPowerOf2_64(C->getZExtValue()))
+ BTK = ConstantBit;
+ else if (isPowerOf2_64((~C->getValue()).getZExtValue()))
+ BTK = NotConstantBit;
+ return {V, BTK};
+ }
+
+ // Check if V is some power of 2 pattern known to be non-zero
+ auto *I = dyn_cast<Instruction>(V);
+ if (I) {
+ bool Not = false;
+ // Check if we have a NOT
+ Value *PeekI;
+ if (match(I, m_c_Xor(m_Value(PeekI), m_AllOnes())) ||
+ match(I, m_Sub(m_AllOnes(), m_Value(PeekI)))) {
+ Not = true;
+ I = dyn_cast<Instruction>(PeekI);
+ assert(I != nullptr);
+ }
+ // We can only use 1 << X without more sophisticated analysis. C << X where
+ // C is a power of 2 but not 1 can result in zero which cannot be translated
+ // to bittest. Likewise any C >> X (either arith or logical) can be zero.
+ if (I->getOpcode() == Instruction::Shl) {
+ // Todo(1): The cmpxchg case is pretty costly so matching `BLSI(X)`, `X &
+ // -X` and some other provable power of 2 patterns that we can use CTZ on
+ // may be profitable.
+ // Todo(2): It may be possible in some cases to prove that Shl(C, X) is
+ // non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also
+ // be provably a non-zero power of 2.
+ // Todo(3): ROTL and ROTR patterns on a power of 2 C should also be
+ // transformable to bittest.
+ auto *ShiftVal = dyn_cast<ConstantInt>(I->getOperand(0));
+ if (!ShiftVal)
+ return {nullptr, UndefBit};
+ if (ShiftVal->equalsInt(1))
+ BTK = Not ? NotShiftBit : ShiftBit;
+
+ if (BTK == UndefBit)
+ return {nullptr, UndefBit};
+
+ Value *BitV = I->getOperand(1);
+
+ Value *AndOp;
+ const APInt *AndC;
+ if (match(BitV, m_c_And(m_Value(AndOp), m_APInt(AndC)))) {
+ // Read past a shiftmask instruction to find count
+ if (*AndC == (I->getType()->getPrimitiveSizeInBits() - 1))
+ BitV = AndOp;
+ }
+ return {BitV, BTK};
+ }
+ }
+ return {nullptr, UndefBit};
+}
+
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
// If the atomicrmw's result isn't actually used, we can just add a "lock"
@@ -31410,51 +31510,138 @@ X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
// If the atomicrmw's result is used by a single bit AND, we may use
// bts/btr/btc instruction for these operations.
- auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
+ // Note: InstCombinePass can cause a de-optimization here. It replaces the
+ // SETCC(And(AtomicRMW(P, power_of_2), power_of_2)) with LShr and Xor
+ // (depending on CC). This pattern can only use bts/btr/btc but we don't
+ // detect it.
Instruction *I = AI->user_back();
- if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And ||
+ auto BitChange = FindSingleBitChange(AI->getValOperand());
+ if (BitChange.second == UndefBit || !AI->hasOneUse() ||
+ I->getOpcode() != Instruction::And ||
+ AI->getType()->getPrimitiveSizeInBits() == 8 ||
AI->getParent() != I->getParent())
return AtomicExpansionKind::CmpXChg;
+
+ assert(I->getOperand(0) == AI);
// The following instruction must be a AND single bit.
- auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
- unsigned Bits = AI->getType()->getPrimitiveSizeInBits();
- if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue()))
+ if (BitChange.second == ConstantBit || BitChange.second == NotConstantBit) {
+ auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
+ assert(C1 != nullptr);
+ auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!C2 || !isPowerOf2_64(C2->getZExtValue())) {
+ return AtomicExpansionKind::CmpXChg;
+ }
+ if (AI->getOperation() == AtomicRMWInst::And) {
+ return ~C1->getValue() == C2->getValue()
+ ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+ }
+ return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+ }
+
+ assert(BitChange.second == ShiftBit || BitChange.second == NotShiftBit);
+
+ auto BitTested = FindSingleBitChange(I->getOperand(1));
+ if (BitTested.second != ShiftBit && BitTested.second != NotShiftBit)
+ return AtomicExpansionKind::CmpXChg;
+
+ assert(BitChange.first != nullptr && BitTested.first != nullptr);
+
+ // If shift amounts are not the same we can't use BitTestIntrinsic.
+ if (BitChange.first != BitTested.first)
return AtomicExpansionKind::CmpXChg;
+ // If atomic AND need to be masking all be one bit and testing the one bit
+ // unset in the mask.
if (AI->getOperation() == AtomicRMWInst::And)
- return ~C1->getValue() == C2->getValue()
+ return (BitChange.second == NotShiftBit && BitTested.second == ShiftBit)
? AtomicExpansionKind::BitTestIntrinsic
: AtomicExpansionKind::CmpXChg;
- return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
- : AtomicExpansionKind::CmpXChg;
+ // If atomic XOR/OR need to be setting and testing the same bit.
+ return (BitChange.second == ShiftBit && BitTested.second == ShiftBit)
+ ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
}
void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ Intrinsic::ID IID_C = Intrinsic::not_intrinsic;
+ Intrinsic::ID IID_I = Intrinsic::not_intrinsic;
switch (AI->getOperation()) {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Or:
- IID = Intrinsic::x86_atomic_bts;
+ IID_C = Intrinsic::x86_atomic_bts;
+ IID_I = Intrinsic::x86_atomic_bts_rm;
break;
case AtomicRMWInst::Xor:
- IID = Intrinsic::x86_atomic_btc;
+ IID_C = Intrinsic::x86_atomic_btc;
+ IID_I = Intrinsic::x86_atomic_btc_rm;
break;
case AtomicRMWInst::And:
- IID = Intrinsic::x86_atomic_btr;
+ IID_C = Intrinsic::x86_atomic_btr;
+ IID_I = Intrinsic::x86_atomic_btr_rm;
break;
}
Instruction *I = AI->user_back();
LLVMContext &Ctx = AI->getContext();
- unsigned Imm =
- countTrailingZeros(cast<ConstantInt>(I->getOperand(1))->getZExtValue());
- Function *BitTest =
- Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
Type::getInt8PtrTy(Ctx));
- Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
+ Function *BitTest = nullptr;
+ Value *Result = nullptr;
+ auto BitTested = FindSingleBitChange(AI->getValOperand());
+ assert(BitTested.first != nullptr);
+ if (BitTested.second == ConstantBit || BitTested.second == NotConstantBit) {
+ auto *C = dyn_cast<ConstantInt>(I->getOperand(1));
+ assert(C != nullptr);
+
+ BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType());
+
+ unsigned Imm = countTrailingZeros(C->getZExtValue());
+ Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
+ } else {
+ BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType());
+
+ assert(BitTested.second == ShiftBit || BitTested.second == NotShiftBit);
+
+ Value *SI = BitTested.first;
+ assert(SI != nullptr);
+
+ // BT{S|R|C} on memory operand don't modulo bit position so we need to
+ // mask it.
+ unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits();
+ Value *BitPos =
+ Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1));
+ // Todo(1): In many cases it may be provable that SI is less than
+ // ShiftBits in which case this mask is unnecessary
+ // Todo(2): In the fairly idiomatic case of P[X / sizeof_bits(X)] OP 1
+ // << (X % sizeof_bits(X)) we can drop the shift mask and AGEN in
+ // favor of just a raw BT{S|R|C}.
+
+ Result = Builder.CreateCall(BitTest, {Addr, BitPos});
+ Result = Builder.CreateZExtOrTrunc(Result, AI->getType());
+
+ // If the result is only used for zero/non-zero status then we don't need to
+ // shift value back. Otherwise do so.
+ for (auto It = I->user_begin(); It != I->user_end(); ++It) {
+ if (auto *ICmp = dyn_cast<ICmpInst>(*It)) {
+ if (ICmp->isEquality()) {
+ auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0));
+ auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1));
+ if (C0 || C1) {
+ assert(C0 == nullptr || C1 == nullptr);
+ if ((C0 ? C0 : C1)->isZero())
+ continue;
+ }
+ }
+ }
+ Result = Builder.CreateShl(Result, BitPos);
+ break;
+ }
+ }
+
I->replaceAllUsesWith(Result);
I->eraseFromParent();
AI->eraseFromParent();
@@ -34242,6 +34429,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LBTS)
NODE_NAME_CASE(LBTC)
NODE_NAME_CASE(LBTR)
+ NODE_NAME_CASE(LBTS_RM)
+ NODE_NAME_CASE(LBTC_RM)
+ NODE_NAME_CASE(LBTR_RM)
NODE_NAME_CASE(AADD)
NODE_NAME_CASE(AOR)
NODE_NAME_CASE(AXOR)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b727725613864..c08227b5b383a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -798,6 +798,9 @@ namespace llvm {
LBTS,
LBTC,
LBTR,
+ LBTS_RM,
+ LBTC_RM,
+ LBTR_RM,
/// RAO arithmetic instructions.
/// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 823784ce7e989..8fddd0037999d 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -864,6 +864,17 @@ def x86btc : SDNode<"X86ISD::LBTC", X86LBTest,
def x86btr : SDNode<"X86ISD::LBTR", X86LBTest,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
+def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+
multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteBitTestSetRegRMW] in {
@@ -882,10 +893,33 @@ multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
}
}
+multiclass ATOMIC_LOGIC_OP_RM<bits<8> Opc8, string s> {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteBitTestSetRegRMW] in {
+ def 16rm : Ii8<Opc8, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR16:$src2))]>,
+ OpSize16, TB, LOCK;
+ def 32rm : Ii8<Opc8, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR32:$src2))]>,
+ OpSize32, TB, LOCK;
+ def 64rm : RIi8<Opc8, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR64:$src2))]>,
+ TB, LOCK;
+ }
+}
+
+
defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">;
defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">;
defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">;
+defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">;
+defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">;
+defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">;
+
// Atomic compare and swap.
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
index fbaf2303df99b..f7ebf302adb6d 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
@@ -5,19 +5,12 @@ define i64 @atomic_shl1_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_xor_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btcq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: xorq %rdx, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%shl = shl nuw i64 1, %c
@@ -85,20 +78,12 @@ define i64 @atomic_shl1_small_mask_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_small_mask_xor_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: andb $31, %cl
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $31, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btcq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: xorq %rdx, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB3_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andl %edx, %eax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 31
@@ -112,21 +97,12 @@ define i64 @atomic_shl1_mask0_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask0_xor_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: xorq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB4_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btcq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 63
@@ -141,21 +117,12 @@ define i64 @atomic_shl1_mask1_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask1_xor_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: xorq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB5_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btcq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%shl = shl nuw i64 1, %c
@@ -170,19 +137,12 @@ define i64 @atomic_shl1_mask01_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask01_xor_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btcq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: xorq %rdx, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB6_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 63
@@ -701,21 +661,12 @@ define i64 @atomic_shl1_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_and_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btrq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB24_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB24_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%shl = shl nuw i64 1, %c
@@ -788,22 +739,12 @@ define i64 @atomic_shl1_small_mask_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_small_mask_and_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: andb $31, %cl
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
+; CHECK-NEXT: andl $31, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btrq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB27_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB27_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andl %edx, %eax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 31
@@ -818,21 +759,12 @@ define i64 @atomic_shl1_mask0_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask0_and_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movq $-2, %rdx
-; CHECK-NEXT: rolq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB28_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: andq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB28_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btrq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 63
@@ -848,21 +780,12 @@ define i64 @atomic_shl1_mask1_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask1_and_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movq $-2, %rdx
-; CHECK-NEXT: rolq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB29_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: andq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB29_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btrq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%shl = shl nuw i64 1, %c
@@ -878,21 +801,12 @@ define i64 @atomic_shl1_mask01_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
+; CHECK-NEXT: andl $63, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: lock btrq %rcx, (%rdi)
+; CHECK-NEXT: setb %al
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB30_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
-; CHECK-NEXT: jne .LBB30_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: shlq %cl, %rax
; CHECK-NEXT: retq
entry:
%rem = and i64 %c, 63
@@ -1185,26 +1099,14 @@ entry:
define i64 @atomic_shl1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_and_64_gpr_brnz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB40_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %r8
-; CHECK-NEXT: andq %rsi, %r8
-; CHECK-NEXT: lock cmpxchgq %r8, (%rdi)
-; CHECK-NEXT: jne .LBB40_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: testq %rdx, %rax
-; CHECK-NEXT: je .LBB40_3
-; CHECK-NEXT: # %bb.4: # %if.then
-; CHECK-NEXT: movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: andl $63, %eax
+; CHECK-NEXT: lock btrq %rax, (%rdi)
+; CHECK-NEXT: jae .LBB40_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB40_3:
+; CHECK-NEXT: .LBB40_1:
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: retq
entry:
@@ -1316,27 +1218,13 @@ return: ; preds = %entry, %if.then
define i64 @atomic_shl1_small_mask_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: andl $31, %ecx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB43_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %r8
-; CHECK-NEXT: andq %rsi, %r8
-; CHECK-NEXT: lock cmpxchgq %r8, (%rdi)
-; CHECK-NEXT: jne .LBB43_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: testl %edx, %eax
-; CHECK-NEXT: je .LBB43_3
-; CHECK-NEXT: # %bb.4: # %if.then
-; CHECK-NEXT: movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT: andl $31, %esi
+; CHECK-NEXT: lock btrq %rsi, (%rdi)
+; CHECK-NEXT: jae .LBB43_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB43_3:
+; CHECK-NEXT: .LBB43_1:
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: retq
entry:
@@ -1361,24 +1249,14 @@ return: ; preds = %entry, %if.then
define i64 @atomic_shl1_mask0_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask0_and_64_gpr_brnz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movq $-2, %rdx
-; CHECK-NEXT: rolq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB44_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: andq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB44_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: btq %rcx, %rax
-; CHECK-NEXT: jae .LBB44_3
-; CHECK-NEXT: # %bb.4: # %if.then
-; CHECK-NEXT: movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: andl $63, %eax
+; CHECK-NEXT: lock btrq %rax, (%rdi)
+; CHECK-NEXT: jae .LBB44_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB44_3:
+; CHECK-NEXT: .LBB44_1:
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: retq
entry:
@@ -1404,24 +1282,14 @@ return: ; preds = %entry, %if.then
define i64 @atomic_shl1_mask1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask1_and_64_gpr_brnz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movq $-2, %rdx
-; CHECK-NEXT: rolq %cl, %rdx
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB45_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: andq %rdx, %rsi
-; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
-; CHECK-NEXT: jne .LBB45_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: btq %rcx, %rax
-; CHECK-NEXT: jae .LBB45_3
-; CHECK-NEXT: # %bb.4: # %if.then
-; CHECK-NEXT: movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: andl $63, %eax
+; CHECK-NEXT: lock btrq %rax, (%rdi)
+; CHECK-NEXT: jae .LBB45_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB45_3:
+; CHECK-NEXT: .LBB45_1:
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: retq
entry:
@@ -1447,26 +1315,14 @@ return: ; preds = %entry, %if.then
define i64 @atomic_shl1_mask01_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_brnz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movl $1, %edx
-; CHECK-NEXT: shlq %cl, %rdx
-; CHECK-NEXT: movq $-2, %rsi
-; CHECK-NEXT: rolq %cl, %rsi
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB46_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rax, %r8
-; CHECK-NEXT: andq %rsi, %r8
-; CHECK-NEXT: lock cmpxchgq %r8, (%rdi)
-; CHECK-NEXT: jne .LBB46_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: testq %rdx, %rax
-; CHECK-NEXT: je .LBB46_3
-; CHECK-NEXT: # %bb.4: # %if.then
-; CHECK-NEXT: movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: andl $63, %eax
+; CHECK-NEXT: lock btrq %rax, (%rdi)
+; CHECK-NEXT: jae .LBB46_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB46_3:
+; CHECK-NEXT: .LBB46_1:
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
index 4770fc1bb449c..b0f4e08f292e4 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -751,49 +751,26 @@ entry:
define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_xor_16_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $7, %cl
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movzwl (%edx), %eax
-; X86-NEXT: movzwl %si, %ecx
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB13_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: xorl %ecx, %esi
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %si, (%edx)
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB13_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $7, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcw %cx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_xor_16_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andb $7, %cl
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $7, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcw %cx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: movzwl %dx, %ecx
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB13_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: xorl %ecx, %edx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %dx, (%rdi)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB13_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %ecx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -936,47 +913,26 @@ entry:
define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_xor_16_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $15, %cl
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movzwl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB16_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, (%esi)
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB16_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $15, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcw %cx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_xor_16_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andb $15, %cl
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $15, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcw %cx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB16_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl %edx, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, (%rdi)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB16_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -2384,56 +2340,27 @@ entry:
define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_and_16_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $7, %cl
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movw $-2, %di
-; X86-NEXT: rolw %cl, %di
-; X86-NEXT: movzwl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB37_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $7, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrw %cx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, (%edx)
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB37_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %si, %ecx
-; X86-NEXT: andl %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_and_16_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andb $7, %cl
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movw $-2, %si
+; X64-NEXT: andl $7, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrw %cx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rolw %cl, %si
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB37_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl %esi, %ecx
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, (%rdi)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB37_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %dx, %ecx
-; X64-NEXT: andl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%0 = and i16 %c, 7
@@ -2575,55 +2502,26 @@ entry:
define zeroext i16 @atomic_shl1_mask01_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_and_16_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andb $15, %cl
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movw $-2, %di
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: rolw %cl, %di
-; X86-NEXT: movzwl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB40_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl %edi, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, (%esi)
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB40_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $15, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrw %cx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_and_16_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andb $15, %cl
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movw $-2, %r8w
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: rolw %cl, %r8w
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB40_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl %r8d, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, (%rdi)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB40_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: andl $15, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrw %cx, (%rdi)
+; X64-NEXT: setb %al
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -3855,40 +3753,25 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_or_32_gpr_val(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_or_32_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB60_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: lock cmpxchgl %ecx, (%esi)
-; X86-NEXT: jne .LBB60_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %edx, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl %ecx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_or_32_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $31, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl %ecx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB60_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl %edx, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, (%rdi)
-; X64-NEXT: jne .LBB60_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: retq
entry:
%shl = shl nuw i32 1, %c
@@ -3900,42 +3783,25 @@ entry:
define i32 @atomic_shl1_small_mask_or_32_gpr_val(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $15, %cl
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB61_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: lock cmpxchgl %ecx, (%edx)
-; X86-NEXT: jne .LBB61_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $15, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl %ecx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andb $15, %cl
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $15, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl %ecx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB61_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl %edx, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, (%rdi)
-; X64-NEXT: jne .LBB61_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: retq
entry:
%0 = and i32 %c, 15
@@ -3948,47 +3814,25 @@ entry:
define i32 @atomic_shl1_mask0_or_32_gpr_val(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask0_or_32_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB62_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB62_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $1, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl %ecx, (%edx)
+; X86-NEXT: setb %al
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: andl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask0_or_32_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB62_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB62_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $31, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl %ecx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: retq
entry:
%0 = and i32 %c, 31
@@ -4002,47 +3846,25 @@ entry:
define i32 @atomic_shl1_mask1_or_32_gpr_val(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask1_or_32_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB63_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB63_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $1, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl %ecx, (%edx)
+; X86-NEXT: setb %al
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: andl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask1_or_32_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB63_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB63_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $31, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl %ecx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: retq
entry:
%shl = shl nuw i32 1, %c
@@ -4056,40 +3878,25 @@ entry:
define i32 @atomic_shl1_mask01_or_32_gpr_val(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_or_32_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB64_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: lock cmpxchgl %ecx, (%esi)
-; X86-NEXT: jne .LBB64_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl %edx, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl %ecx, (%edx)
+; X86-NEXT: setb %al
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_or_32_gpr_val:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
+; X64-NEXT: andl $31, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl %ecx, (%rdi)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB64_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl %edx, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, (%rdi)
-; X64-NEXT: jne .LBB64_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl %edx, %eax
+; X64-NEXT: shll %cl, %eax
; X64-NEXT: retq
entry:
%0 = and i32 %c, 31
@@ -4807,54 +4614,30 @@ entry:
define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_or_32_gpr_br:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB78_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB78_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB78_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB78_5
-; X86-NEXT: .LBB78_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB78_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB78_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB78_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_or_32_gpr_br:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB78_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB78_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB78_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB78_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB78_3:
+; X64-NEXT: .LBB78_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -4878,56 +4661,28 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_small_mask_or_32_gpr_br(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_br:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB79_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB79_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB79_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB79_5
-; X86-NEXT: .LBB79_3:
+; X86-NEXT: lock btsl %ecx, (%eax)
+; X86-NEXT: jae .LBB79_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%eax,%ecx,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB79_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB79_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_br:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB79_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB79_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB79_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: andl $15, %esi
+; X64-NEXT: lock btsl %esi, (%rdi)
+; X64-NEXT: jae .LBB79_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB79_3:
+; X64-NEXT: .LBB79_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -4952,54 +4707,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask0_or_32_gpr_br:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB80_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB80_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jae .LBB80_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB80_5
-; X86-NEXT: .LBB80_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB80_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB80_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB80_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask0_or_32_gpr_br:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB80_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB80_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB80_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB80_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB80_3:
+; X64-NEXT: .LBB80_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5025,54 +4756,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask1_or_32_gpr_br:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB81_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB81_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jae .LBB81_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB81_5
-; X86-NEXT: .LBB81_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB81_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB81_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB81_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask1_or_32_gpr_br:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB81_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB81_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB81_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB81_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB81_3:
+; X64-NEXT: .LBB81_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5098,54 +4805,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_or_32_gpr_br:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB82_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB82_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB82_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB82_5
-; X86-NEXT: .LBB82_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB82_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB82_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB82_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_or_32_gpr_br:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB82_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB82_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB82_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB82_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB82_3:
+; X64-NEXT: .LBB82_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5243,56 +4926,31 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_or_32_gpr_brz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl $1, %edi
-; X86-NEXT: shll %cl, %edi
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB84_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: lock cmpxchgl %edx, (%esi)
-; X86-NEXT: jne .LBB84_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $123, %edx
-; X86-NEXT: testl %edi, %eax
-; X86-NEXT: jne .LBB84_4
-; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movl (%esi,%ecx,4), %edx
-; X86-NEXT: .LBB84_4: # %return
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: lock btsl %eax, (%edx)
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: jae .LBB84_1
+; X86-NEXT: # %bb.2: # %return
+; X86-NEXT: retl
+; X86-NEXT: .LBB84_1: # %if.then
+; X86-NEXT: movl (%edx,%ecx,4), %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_or_32_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %esi
-; X64-NEXT: shll %cl, %esi
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB84_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: orl %esi, %edx
-; X64-NEXT: lock cmpxchgl %edx, (%rdi)
-; X64-NEXT: jne .LBB84_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $123, %edx
-; X64-NEXT: testl %esi, %eax
-; X64-NEXT: je .LBB84_3
-; X64-NEXT: # %bb.4: # %return
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: movl $123, %eax
+; X64-NEXT: jae .LBB84_1
+; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
-; X64-NEXT: .LBB84_3: # %if.then
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: movl (%rdi,%rax,4), %edx
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: .LBB84_1: # %if.then
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
entry:
%shl = shl nuw i32 1, %c
@@ -5315,58 +4973,29 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_small_mask_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_brz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movl $1, %edi
-; X86-NEXT: shll %cl, %edi
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB85_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: lock cmpxchgl %edx, (%esi)
-; X86-NEXT: jne .LBB85_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $123, %edx
-; X86-NEXT: testl %edi, %eax
-; X86-NEXT: jne .LBB85_4
-; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movl (%esi,%ecx,4), %edx
-; X86-NEXT: .LBB85_4: # %return
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $15, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: jae .LBB85_1
+; X86-NEXT: # %bb.2: # %return
+; X86-NEXT: retl
+; X86-NEXT: .LBB85_1: # %if.then
+; X86-NEXT: movl (%ecx,%edx,4), %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movl $1, %esi
-; X64-NEXT: shll %cl, %esi
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB85_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: orl %esi, %edx
-; X64-NEXT: lock cmpxchgl %edx, (%rdi)
-; X64-NEXT: jne .LBB85_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $123, %edx
-; X64-NEXT: testl %esi, %eax
-; X64-NEXT: je .LBB85_3
-; X64-NEXT: # %bb.4: # %return
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: andl $15, %esi
+; X64-NEXT: lock btsl %esi, (%rdi)
+; X64-NEXT: movl $123, %eax
+; X64-NEXT: jae .LBB85_1
+; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
-; X64-NEXT: .LBB85_3: # %if.then
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: movl (%rdi,%rax,4), %edx
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: .LBB85_1: # %if.then
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
entry:
%0 = and i32 %c, 15
@@ -5390,56 +5019,31 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask0_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask0_or_32_gpr_brz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB86_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %edx, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%esi)
-; X86-NEXT: jne .LBB86_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $123, %edx
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jb .LBB86_4
-; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movl (%esi,%ecx,4), %edx
-; X86-NEXT: .LBB86_4: # %return
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: lock btsl %eax, (%edx)
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: jae .LBB86_1
+; X86-NEXT: # %bb.2: # %return
+; X86-NEXT: retl
+; X86-NEXT: .LBB86_1: # %if.then
+; X86-NEXT: movl (%edx,%ecx,4), %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask0_or_32_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB86_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB86_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $123, %edx
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB86_3
-; X64-NEXT: # %bb.4: # %return
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: movl $123, %eax
+; X64-NEXT: jae .LBB86_1
+; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
-; X64-NEXT: .LBB86_3: # %if.then
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: movl (%rdi,%rax,4), %edx
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: .LBB86_1: # %if.then
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
entry:
%rem = and i32 %c, 31
@@ -5464,56 +5068,31 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask1_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask1_or_32_gpr_brz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB87_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %edx, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%esi)
-; X86-NEXT: jne .LBB87_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $123, %edx
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jb .LBB87_4
-; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movl (%esi,%ecx,4), %edx
-; X86-NEXT: .LBB87_4: # %return
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: lock btsl %eax, (%edx)
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: jae .LBB87_1
+; X86-NEXT: # %bb.2: # %return
+; X86-NEXT: retl
+; X86-NEXT: .LBB87_1: # %if.then
+; X86-NEXT: movl (%edx,%ecx,4), %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask1_or_32_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB87_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB87_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $123, %edx
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB87_3
-; X64-NEXT: # %bb.4: # %return
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: movl $123, %eax
+; X64-NEXT: jae .LBB87_1
+; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
-; X64-NEXT: .LBB87_3: # %if.then
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: movl (%rdi,%rax,4), %edx
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: .LBB87_1: # %if.then
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
entry:
%shl = shl nuw i32 1, %c
@@ -5538,56 +5117,31 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask01_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_or_32_gpr_brz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl $1, %edi
-; X86-NEXT: shll %cl, %edi
-; X86-NEXT: movl (%esi), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB88_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: lock cmpxchgl %edx, (%esi)
-; X86-NEXT: jne .LBB88_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movl $123, %edx
-; X86-NEXT: testl %edi, %eax
-; X86-NEXT: jne .LBB88_4
-; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movl (%esi,%ecx,4), %edx
-; X86-NEXT: .LBB88_4: # %return
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: lock btsl %eax, (%edx)
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: jae .LBB88_1
+; X86-NEXT: # %bb.2: # %return
+; X86-NEXT: retl
+; X86-NEXT: .LBB88_1: # %if.then
+; X86-NEXT: movl (%edx,%ecx,4), %eax
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_or_32_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %esi
-; X64-NEXT: shll %cl, %esi
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB88_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: orl %esi, %edx
-; X64-NEXT: lock cmpxchgl %edx, (%rdi)
-; X64-NEXT: jne .LBB88_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movl $123, %edx
-; X64-NEXT: testl %esi, %eax
-; X64-NEXT: je .LBB88_3
-; X64-NEXT: # %bb.4: # %return
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: movl $123, %eax
+; X64-NEXT: jae .LBB88_1
+; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
-; X64-NEXT: .LBB88_3: # %if.then
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: movl (%rdi,%rax,4), %edx
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: .LBB88_1: # %if.then
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
entry:
%rem = and i32 %c, 31
@@ -5685,54 +5239,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_or_32_gpr_brnz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB90_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB90_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB90_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB90_5
-; X86-NEXT: .LBB90_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB90_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB90_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB90_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_or_32_gpr_brnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB90_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB90_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB90_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB90_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB90_3:
+; X64-NEXT: .LBB90_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5756,56 +5286,28 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_small_mask_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB91_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB91_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB91_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB91_5
-; X86-NEXT: .LBB91_3:
+; X86-NEXT: lock btsl %ecx, (%eax)
+; X86-NEXT: jae .LBB91_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%eax,%ecx,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB91_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB91_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB91_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB91_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB91_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: andl $15, %esi
+; X64-NEXT: lock btsl %esi, (%rdi)
+; X64-NEXT: jae .LBB91_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB91_3:
+; X64-NEXT: .LBB91_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5830,54 +5332,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask0_or_32_gpr_brnz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB92_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB92_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jae .LBB92_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB92_5
-; X86-NEXT: .LBB92_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB92_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB92_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB92_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask0_or_32_gpr_brnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB92_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB92_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB92_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB92_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB92_3:
+; X64-NEXT: .LBB92_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5903,54 +5381,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask1_or_32_gpr_brnz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB93_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB93_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: btl %ecx, %eax
-; X86-NEXT: jae .LBB93_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB93_5
-; X86-NEXT: .LBB93_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB93_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB93_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB93_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask1_or_32_gpr_brnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB93_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB93_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: btl %ecx, %eax
-; X64-NEXT: jae .LBB93_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB93_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB93_3:
+; X64-NEXT: .LBB93_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
@@ -5976,54 +5430,30 @@ return: ; preds = %entry, %if.then
define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_or_32_gpr_brnz:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB94_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: lock cmpxchgl %edi, (%edx)
-; X86-NEXT: jne .LBB94_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: testl %esi, %eax
-; X86-NEXT: je .LBB94_3
-; X86-NEXT: # %bb.4: # %if.then
-; X86-NEXT: movl (%edx,%ecx,4), %eax
-; X86-NEXT: jmp .LBB94_5
-; X86-NEXT: .LBB94_3:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $31, %edx
+; X86-NEXT: lock btsl %edx, (%ecx)
+; X86-NEXT: jae .LBB94_1
+; X86-NEXT: # %bb.2: # %if.then
+; X86-NEXT: movl (%ecx,%eax,4), %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB94_1:
; X86-NEXT: movl $123, %eax
-; X86-NEXT: .LBB94_5: # %return
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_or_32_gpr_brnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl $1, %edx
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB94_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %esi
-; X64-NEXT: orl %edx, %esi
-; X64-NEXT: lock cmpxchgl %esi, (%rdi)
-; X64-NEXT: jne .LBB94_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: testl %edx, %eax
-; X64-NEXT: je .LBB94_3
-; X64-NEXT: # %bb.4: # %if.then
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: lock btsl %eax, (%rdi)
+; X64-NEXT: jae .LBB94_1
+; X64-NEXT: # %bb.2: # %if.then
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl (%rdi,%rax,4), %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB94_3:
+; X64-NEXT: .LBB94_1:
; X64-NEXT: movl $123, %eax
; X64-NEXT: retq
entry:
More information about the llvm-commits
mailing list