[llvm] e03d216 - [X86] Use bit test instructions to optimize some logic atomic operations
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 28 18:35:40 PST 2022
Author: Phoebe Wang
Date: 2022-03-01T09:57:08+08:00
New Revision: e03d216c28dfbda52afe7e1f6606cc5dafb3235e
URL: https://github.com/llvm/llvm-project/commit/e03d216c28dfbda52afe7e1f6606cc5dafb3235e
DIFF: https://github.com/llvm/llvm-project/commit/e03d216c28dfbda52afe7e1f6606cc5dafb3235e.diff
LOG: [X86] Use bit test instructions to optimize some logic atomic operations
This is to match GCC's optimizations: https://gcc.godbolt.org/z/3odh9e7WE
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D120199
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/test/CodeGen/X86/atomic-bit-test.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fbb9767e4da71..3104428925af6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -253,7 +253,9 @@ class TargetLoweringBase {
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
- MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
+ MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
+ BitTestIntrinsic, // Use a target-specific intrinsic for special bit
+ // operations; used by X86.
};
/// Enum that specifies when a multiplication should be expanded.
@@ -1951,6 +1953,14 @@ class TargetLoweringBase {
llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
}
+ /// Perform a bit test atomicrmw using a target-specific intrinsic. This
+ /// represents the combined bit test intrinsic which will be lowered at a late
+ /// stage by the backend.
+ virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
+ llvm_unreachable(
+ "Bit test atomicrmw expansion unimplemented on this target");
+ }
+
/// Perform a masked cmpxchg using a target-specific intrinsic. This
/// represents the core LL/SC loop which will be lowered at a late stage by
/// the backend.
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 8de737a1c7a58..e954fe7cb8f67 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -62,6 +62,16 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_i32_ty], [], []>;
}
+// Lock bit test.
+let TargetPrefix = "x86" in {
+ def int_x86_atomic_bts : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+ def int_x86_atomic_btc : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+ def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+}
+
//===----------------------------------------------------------------------===//
// CET SS
let TargetPrefix = "x86" in {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4838f6da750dd..897018d202103 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -621,6 +621,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandAtomicRMWToMaskedIntrinsic(AI);
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
+ TLI->emitBitTestAtomicRMWIntrinsic(AI);
+ return true;
+ }
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3fa92928e93d0..6ec80d0b02369 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5442,6 +5442,18 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOLoad;
return true;
+ case Intrinsic::x86_atomic_bts:
+ case Intrinsic::x86_atomic_btc:
+ case Intrinsic::x86_atomic_btr: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ unsigned Size = I.getType()->getScalarSizeInBits();
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
+ Info.align = Align(Size);
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
}
return false;
}
@@ -27510,6 +27522,30 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
+ case Intrinsic::x86_atomic_bts:
+ case Intrinsic::x86_atomic_btc:
+ case Intrinsic::x86_atomic_btr: {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(2);
+ SDValue Op2 = Op.getOperand(3);
+ unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS
+ : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC
+ : X86ISD::LBTR;
+ SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);
+ MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
+ SDValue Res =
+ DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
+ {Chain, Op1, Op2, Size}, VT, MMO);
+ Chain = Res.getValue(1);
+ Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
+ unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+ if (Imm)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getShiftAmountConstant(Imm, VT, DL));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
+ }
}
return SDValue();
}
@@ -30431,6 +30467,65 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::None;
}
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ if (AI->use_empty())
+ return AtomicExpansionKind::None;
+
+ // If the atomicrmw's result is used by a single bit AND, we may use
+ // bts/btr/btc instruction for these operations.
+ auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
+ Instruction *I = AI->user_back();
+ if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And ||
+ AI->getParent() != I->getParent())
+ return AtomicExpansionKind::CmpXChg;
+ // The following instruction must be a AND single bit.
+ auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned Bits = AI->getType()->getPrimitiveSizeInBits();
+ if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue()))
+ return AtomicExpansionKind::CmpXChg;
+
+ if (AI->getOperation() == AtomicRMWInst::And)
+ return ~C1->getValue() == C2->getValue()
+ ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+
+ return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+}
+
+void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
+ IRBuilder<> Builder(AI);
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (AI->getOperation()) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Or:
+ IID = Intrinsic::x86_atomic_bts;
+ break;
+ case AtomicRMWInst::Xor:
+ IID = Intrinsic::x86_atomic_btc;
+ break;
+ case AtomicRMWInst::And:
+ IID = Intrinsic::x86_atomic_btr;
+ break;
+ }
+ Instruction *I = AI->user_back();
+ LLVMContext &Ctx = AI->getContext();
+ unsigned Imm =
+ countTrailingZeros(cast<ConstantInt>(I->getOperand(1))->getZExtValue());
+ Function *BitTest =
+ Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
+ Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
+ Type::getInt8PtrTy(Ctx));
+ Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
+ I->replaceAllUsesWith(Result);
+ I->eraseFromParent();
+ AI->eraseFromParent();
+}
+
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -30455,10 +30550,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
- // If the atomicrmw's result isn't actually used, we can just add a "lock"
- // prefix to a normal instruction for these operations.
- return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
- : AtomicExpansionKind::None;
+ return shouldExpandLogicAtomicRMWInIR(AI);
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -32939,6 +33031,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LOR)
NODE_NAME_CASE(LXOR)
NODE_NAME_CASE(LAND)
+ NODE_NAME_CASE(LBTS)
+ NODE_NAME_CASE(LBTC)
+ NODE_NAME_CASE(LBTR)
NODE_NAME_CASE(VZEXT_MOVL)
NODE_NAME_CASE(VZEXT_LOAD)
NODE_NAME_CASE(VEXTRACT_STORE)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 99299dc884c98..80ffbf9a3dd0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -787,6 +787,9 @@ namespace llvm {
LOR,
LXOR,
LAND,
+ LBTS,
+ LBTC,
+ LBTR,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -1640,6 +1643,9 @@ namespace llvm {
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index fe8126f3dc7d8..abad628bebd5c 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -839,6 +839,38 @@ let Predicates = [UseIncDec] in {
def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
}
+// Atomic bit test.
+def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
+ SDTCisVT<2, i8>, SDTCisVT<3, i32>]>;
+def x86bts : SDNode<"X86ISD::LBTS", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86btc : SDNode<"X86ISD::LBTC", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86btr : SDNode<"X86ISD::LBTR", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteBitTestSetRegRMW] in {
+ def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2),
+ !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>,
+ OpSize16, TB, LOCK;
+ def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2),
+ !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>,
+ OpSize32, TB, LOCK;
+ def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2),
+ !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>,
+ TB, LOCK;
+ }
+}
+
+defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">;
+defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">;
+defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">;
+
// Atomic compare and swap.
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll
index da1b7aa14e707..4372fcf6be8a9 100644
--- a/llvm/test/CodeGen/X86/atomic-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll
@@ -9,35 +9,17 @@
define i16 @bts1() nounwind {
; X86-LABEL: bts1:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB0_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $1, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB0_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsw $0, v16
+; X86-NEXT: setb %al
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: bts1:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB0_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $1, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB0_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsw $0, v16(%rip)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -49,35 +31,19 @@ entry:
define i16 @bts2() nounwind {
; X86-LABEL: bts2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB1_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $2, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB1_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $2, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsw $1, v16
+; X86-NEXT: setb %al
+; X86-NEXT: addl %eax, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: bts2:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB1_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $2, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB1_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $2, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsw $1, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: addl %eax, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -89,35 +55,19 @@ entry:
define i16 @bts15() nounwind {
; X86-LABEL: bts15:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB2_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $32768, %ecx # imm = 0x8000
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB2_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $32768, %eax # imm = 0x8000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsw $15, v16
+; X86-NEXT: setb %al
+; X86-NEXT: shll $15, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: bts15:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB2_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $32768, %ecx # imm = 0x8000
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB2_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $32768, %eax # imm = 0x8000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsw $15, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $15, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -129,30 +79,18 @@ entry:
define i32 @bts31() nounwind {
; X86-LABEL: bts31:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl v32, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB3_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: lock cmpxchgl %ecx, v32
-; X86-NEXT: jne .LBB3_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl $31, v32
+; X86-NEXT: setb %al
+; X86-NEXT: shll $31, %eax
; X86-NEXT: retl
;
; X64-LABEL: bts31:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl v32(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB3_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $-2147483648, %ecx # imm = 0x80000000
-; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
-; X64-NEXT: jne .LBB3_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl $31, v32(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
entry:
%0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4
@@ -185,17 +123,10 @@ define i64 @bts63() nounwind {
;
; X64-LABEL: bts63:
; X64: # %bb.0: # %entry
-; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT: movq v64(%rip), %rax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB4_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: orq %rcx, %rdx
-; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
-; X64-NEXT: jne .LBB4_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andq %rcx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsq $63, v64(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shlq $63, %rax
; X64-NEXT: retq
entry:
%0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8
@@ -206,35 +137,17 @@ entry:
define i16 @btc1() nounwind {
; X86-LABEL: btc1:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB5_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: xorl $1, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB5_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcw $0, v16
+; X86-NEXT: setb %al
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btc1:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB5_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl $1, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB5_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcw $0, v16(%rip)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -246,35 +159,19 @@ entry:
define i16 @btc2() nounwind {
; X86-LABEL: btc2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB6_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: xorl $2, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB6_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $2, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcw $1, v16
+; X86-NEXT: setb %al
+; X86-NEXT: addl %eax, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btc2:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB6_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl $2, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB6_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $2, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcw $1, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: addl %eax, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -286,35 +183,19 @@ entry:
define i16 @btc15() nounwind {
; X86-LABEL: btc15:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB7_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: xorl $32768, %ecx # imm = 0x8000
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB7_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $32768, %eax # imm = 0x8000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcw $15, v16
+; X86-NEXT: setb %al
+; X86-NEXT: shll $15, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btc15:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB7_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl $32768, %ecx # imm = 0x8000
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB7_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $32768, %eax # imm = 0x8000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcw $15, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $15, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -326,30 +207,18 @@ entry:
define i32 @btc31() nounwind {
; X86-LABEL: btc31:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl v32, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB8_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: lock cmpxchgl %ecx, v32
-; X86-NEXT: jne .LBB8_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btcl $31, v32
+; X86-NEXT: setb %al
+; X86-NEXT: shll $31, %eax
; X86-NEXT: retl
;
; X64-LABEL: btc31:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl v32(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB8_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000
-; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
-; X64-NEXT: jne .LBB8_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcl $31, v32(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
entry:
%0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4
@@ -382,17 +251,10 @@ define i64 @btc63() nounwind {
;
; X64-LABEL: btc63:
; X64: # %bb.0: # %entry
-; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT: movq v64(%rip), %rax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB9_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: xorq %rcx, %rdx
-; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
-; X64-NEXT: jne .LBB9_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andq %rcx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btcq $63, v64(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shlq $63, %rax
; X64-NEXT: retq
entry:
%0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8
@@ -403,35 +265,17 @@ entry:
define i16 @btr1() nounwind {
; X86-LABEL: btr1:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB10_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB10_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrw $0, v16
+; X86-NEXT: setb %al
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btr1:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB10_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB10_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrw $0, v16(%rip)
+; X64-NEXT: setb %al
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -443,35 +287,19 @@ entry:
define i16 @btr2() nounwind {
; X86-LABEL: btr2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB11_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $65533, %ecx # imm = 0xFFFD
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB11_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $2, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrw $1, v16
+; X86-NEXT: setb %al
+; X86-NEXT: addl %eax, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btr2:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB11_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $65533, %ecx # imm = 0xFFFD
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB11_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $2, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrw $1, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: addl %eax, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -483,35 +311,19 @@ entry:
define i16 @btr15() nounwind {
; X86-LABEL: btr15:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB12_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB12_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $32768, %eax # imm = 0x8000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrw $15, v16
+; X86-NEXT: setb %al
+; X86-NEXT: shll $15, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btr15:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB12_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB12_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $32768, %eax # imm = 0x8000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrw $15, v16(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $15, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -523,30 +335,18 @@ entry:
define i32 @btr31() nounwind {
; X86-LABEL: btr31:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl v32, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB13_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT: lock cmpxchgl %ecx, v32
-; X86-NEXT: jne .LBB13_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btrl $31, v32
+; X86-NEXT: setb %al
+; X86-NEXT: shll $31, %eax
; X86-NEXT: retl
;
; X64-LABEL: btr31:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl v32(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB13_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
-; X64-NEXT: jne .LBB13_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrl $31, v32(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
entry:
%0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4
@@ -585,18 +385,10 @@ define i64 @btr63() nounwind {
;
; X64-LABEL: btr63:
; X64: # %bb.0: # %entry
-; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: movq v64(%rip), %rax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB14_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
-; X64-NEXT: jne .LBB14_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: incq %rcx
-; X64-NEXT: andq %rcx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btrq $63, v64(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shlq $63, %rax
; X64-NEXT: retq
entry:
%0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8
@@ -655,36 +447,18 @@ entry:
define i16 @multi_use2() nounwind {
; X86-LABEL: multi_use2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl v16, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB16_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $1, %ecx
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %cx, v16
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB16_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsw $0, v16
+; X86-NEXT: setb %al
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: multi_use2:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl v16(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB16_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $1, %ecx
-; X64-NEXT: # kill: def $ax killed $ax killed $rax
-; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT: # kill: def $ax killed $ax def $rax
-; X64-NEXT: jne .LBB16_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsw $0, v16(%rip)
+; X64-NEXT: setb %al
; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
@@ -764,39 +538,23 @@ declare void @foo()
define void @no_and_cmp0_fold() nounwind {
; X86-LABEL: no_and_cmp0_fold:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl v32, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB18_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orl $8, %ecx
-; X86-NEXT: lock cmpxchgl %ecx, v32
-; X86-NEXT: jne .LBB18_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: lock btsl $3, v32
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb %al, %al
-; X86-NEXT: je .LBB18_3
-; X86-NEXT: # %bb.4: # %if.end
+; X86-NEXT: je .LBB18_1
+; X86-NEXT: # %bb.2: # %if.end
; X86-NEXT: retl
-; X86-NEXT: .LBB18_3: # %if.then
+; X86-NEXT: .LBB18_1: # %if.then
;
; X64-LABEL: no_and_cmp0_fold:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl v32(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB18_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $8, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
-; X64-NEXT: jne .LBB18_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al
-; X64-NEXT: je .LBB18_3
-; X64-NEXT: # %bb.4: # %if.end
+; X64-NEXT: je .LBB18_1
+; X64-NEXT: # %bb.2: # %if.end
; X64-NEXT: retq
-; X64-NEXT: .LBB18_3: # %if.then
+; X64-NEXT: .LBB18_1: # %if.then
entry:
%0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4
%and = and i32 %0, 8
@@ -815,32 +573,20 @@ define i32 @split_hoist_and(i32 %0) nounwind {
; X86-LABEL: split_hoist_and:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl v32, %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB19_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: orl $8, %edx
-; X86-NEXT: lock cmpxchgl %edx, v32
-; X86-NEXT: jne .LBB19_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: lock btsl $3, v32
+; X86-NEXT: setb %al
+; X86-NEXT: shll $3, %eax
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: andl $8, %eax
; X86-NEXT: retl
;
; X64-LABEL: split_hoist_and:
; X64: # %bb.0:
-; X64-NEXT: movl v32(%rip), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB19_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: orl $8, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
-; X64-NEXT: jne .LBB19_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: lock btsl $3, v32(%rip)
+; X64-NEXT: setb %al
+; X64-NEXT: shll $3, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: andl $8, %eax
; X64-NEXT: retq
%2 = atomicrmw or i32* @v32, i32 8 monotonic, align 4
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)
More information about the llvm-commits
mailing list