[llvm] d558255 - [X86] Use lock add/sub for cases that we only care about the EFLAGS
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 05:43:56 PST 2022
Author: Phoebe Wang
Date: 2022-11-18T21:43:47+08:00
New Revision: d55825565070cd36fa6a41fe8a3bfdec8751e790
URL: https://github.com/llvm/llvm-project/commit/d55825565070cd36fa6a41fe8a3bfdec8751e790
DIFF: https://github.com/llvm/llvm-project/commit/d55825565070cd36fa6a41fe8a3bfdec8751e790.diff
LOG: [X86] Use lock add/sub for cases that we only care about the EFLAGS
This fixes #36373, #36905 and partial of #58685.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D137711
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/pr37025.ll
llvm/test/CodeGen/X86/pr58685.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3442cc8f205b3..b61703c464577 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -258,6 +258,8 @@ class TargetLoweringBase {
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
// operations; used by X86.
+ CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
+ // operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
// Rewrite to a non-atomic form for use in a known non-preemptible
@@ -2018,6 +2020,14 @@ class TargetLoweringBase {
"Bit test atomicrmw expansion unimplemented on this target");
}
+ /// Perform a atomicrmw which the result is only used by comparison, using a
+ /// target-specific intrinsic. This represents the combined atomic and compare
+ /// intrinsic which will be lowered at a late stage by the backend.
+ virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
+ llvm_unreachable(
+ "Compare arith atomicrmw expansion unimplemented on this target");
+ }
+
/// Perform a masked cmpxchg using a target-specific intrinsic. This
/// represents the core LL/SC loop which will be lowered at a late stage by
/// the backend. The target-specific intrinsic returns the loaded value and
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index de4a7f0af95e6..34f884f8a1676 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -72,6 +72,14 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<1>>]>;
}
+// Lock binary arith with CC.
+let TargetPrefix = "x86" in {
+ def int_x86_atomic_add_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<2>>]>;
+ def int_x86_atomic_sub_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<2>>]>;
+}
+
// Read Processor Register.
let TargetPrefix = "x86" in {
def int_x86_rdpru : ClangBuiltin<"__builtin_ia32_rdpru">,
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index fca7a5240ffc9..926ec8704bee5 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -604,6 +604,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
TLI->emitBitTestAtomicRMWIntrinsic(AI);
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
+ TLI->emitCmpArithAtomicRMWIntrinsic(AI);
+ return true;
+ }
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicRMWInst(AI);
case TargetLoweringBase::AtomicExpansionKind::Expand:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b00d31e670b09..dc8821397ebd1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5659,7 +5659,9 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::x86_aor32:
case Intrinsic::x86_aor64:
case Intrinsic::x86_axor32:
- case Intrinsic::x86_axor64: {
+ case Intrinsic::x86_axor64:
+ case Intrinsic::x86_atomic_add_cc:
+ case Intrinsic::x86_atomic_sub_cc: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(0);
unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
@@ -28382,6 +28384,32 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(),
{Chain, Op1, Op2}, VT, MMO);
}
+ case Intrinsic::x86_atomic_add_cc:
+ case Intrinsic::x86_atomic_sub_cc: {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(2);
+ SDValue Op2 = Op.getOperand(3);
+ X86::CondCode CC = (X86::CondCode)Op.getConstantOperandVal(4);
+ MVT VT = Op2.getSimpleValueType();
+ unsigned Opc = 0;
+ switch (IntNo) {
+ default:
+ llvm_unreachable("Unknown Intrinsic");
+ case Intrinsic::x86_atomic_add_cc:
+ Opc = X86ISD::LADD;
+ break;
+ case Intrinsic::x86_atomic_sub_cc:
+ Opc = X86ISD::LSUB;
+ break;
+ }
+ MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
+ SDValue LockArith =
+ DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
+ {Chain, Op1, Op2}, VT, MMO);
+ Chain = LockArith.getValue(1);
+ return DAG.getMergeValues({getSETCC(CC, LockArith, DL, DAG), Chain}, DL);
+ }
}
return SDValue();
}
@@ -31364,6 +31392,74 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
AI->eraseFromParent();
}
+static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
+ using namespace llvm::PatternMatch;
+ if (!AI->hasOneUse())
+ return false;
+
+ Value *Op = AI->getOperand(1);
+ ICmpInst::Predicate Pred;
+ Instruction *I = AI->user_back();
+ AtomicRMWInst::BinOp Opc = AI->getOperation();
+ if (Opc == AtomicRMWInst::Add) {
+ if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
+ return Pred == CmpInst::ICMP_EQ;
+ if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value()))) &&
+ match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+ return Pred == CmpInst::ICMP_SLT;
+ return false;
+ }
+ if (Opc == AtomicRMWInst::Sub) {
+ if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
+ return Pred == CmpInst::ICMP_EQ;
+ if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op)))) &&
+ match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+ return Pred == CmpInst::ICMP_SLT;
+ return false;
+ }
+
+ return false;
+}
+
+void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
+ AtomicRMWInst *AI) const {
+ IRBuilder<> Builder(AI);
+ Instruction *TempI = nullptr;
+ LLVMContext &Ctx = AI->getContext();
+ ICmpInst *ICI = dyn_cast<ICmpInst>(AI->user_back());
+ if (!ICI) {
+ TempI = AI->user_back();
+ assert(TempI->hasOneUse() && "Must have one use");
+ ICI = cast<ICmpInst>(TempI->user_back());
+ }
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT) &&
+ "Not supported Pred");
+ X86::CondCode CC = Pred == CmpInst::ICMP_EQ ? X86::COND_E : X86::COND_S;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (AI->getOperation()) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Add:
+ IID = Intrinsic::x86_atomic_add_cc;
+ break;
+ case AtomicRMWInst::Sub:
+ IID = Intrinsic::x86_atomic_sub_cc;
+ break;
+ }
+ Function *CmpArith =
+ Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
+ Value *Call = Builder.CreateCall(CmpArith, {AI->getPointerOperand(),
+ AI->getValOperand(),
+ Builder.getInt32((unsigned)CC)});
+ Value *Result = Builder.CreateTrunc(Call, Type::getInt1Ty(Ctx));
+ ICI->replaceAllUsesWith(Result);
+ ICI->eraseFromParent();
+ if (TempI)
+ TempI->eraseFromParent();
+ AI->eraseFromParent();
+}
+
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -31381,9 +31477,12 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:
+ return AtomicExpansionKind::None;
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
- // It's better to use xadd, xsub or xchg for these in all cases.
+ if (shouldExpandCmpArithRMWInIR(AI))
+ return AtomicExpansionKind::CmpArithIntrinsic;
+ // It's better to use xadd, xsub or xchg for these in other cases.
return AtomicExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7f1fe04bed5fb..c539ba32d147f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1682,6 +1682,7 @@ namespace llvm {
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
+ void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/X86/pr37025.ll b/llvm/test/CodeGen/X86/pr37025.ll
index 41f9854980c05..4fad1592d5232 100644
--- a/llvm/test/CodeGen/X86/pr37025.ll
+++ b/llvm/test/CodeGen/X86/pr37025.ll
@@ -43,13 +43,13 @@ define void @test_dec_select(ptr nocapture %0, ptr readnone %1) {
define void @test_dec_select_commute(ptr nocapture %0, ptr readnone %1) {
; CHECK-LABEL: test_dec_select_commute:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: sete %al
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je .LBB1_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: cmpq $1, %rax
-; CHECK-NEXT: jne .LBB1_2
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB1_2
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: jmp func2 # TAILCALL
; CHECK-NEXT: .LBB1_2:
@@ -71,13 +71,13 @@ define void @test_dec_select_commute(ptr nocapture %0, ptr readnone %1) {
define void @test_dec_and(ptr nocapture %0, ptr readnone %1) {
; CHECK-LABEL: test_dec_and:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: sete %al
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je .LBB2_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: cmpq $1, %rax
-; CHECK-NEXT: jne .LBB2_2
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB2_2
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: jmp func2 # TAILCALL
; CHECK-NEXT: .LBB2_2:
diff --git a/llvm/test/CodeGen/X86/pr58685.ll b/llvm/test/CodeGen/X86/pr58685.ll
index bc0070361c094..2323162a8cdc4 100644
--- a/llvm/test/CodeGen/X86/pr58685.ll
+++ b/llvm/test/CodeGen/X86/pr58685.ll
@@ -4,9 +4,7 @@
define i1 @lock_add_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_add_sete:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: lock xaddl %eax, (%rdi)
-; CHECK-NEXT: addl %esi, %eax
+; CHECK-NEXT: lock addl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw add ptr %0, i32 %1 seq_cst, align 4
@@ -18,11 +16,8 @@ define i1 @lock_add_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_add_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_add_sets:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: lock xaddl %eax, (%rdi)
-; CHECK-NEXT: addl %esi, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: lock addl %esi, (%rdi)
+; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw add ptr %0, i32 %1 seq_cst, align 4
%4 = add i32 %3, %1
@@ -33,10 +28,7 @@ define i1 @lock_add_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_sub_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_sub_sete:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: negl %eax
-; CHECK-NEXT: lock xaddl %eax, (%rdi)
-; CHECK-NEXT: cmpl %esi, %eax
+; CHECK-NEXT: lock subl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i32 %1 seq_cst, align 4
@@ -47,12 +39,8 @@ define i1 @lock_sub_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_sub_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_sub_sets:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: negl %eax
-; CHECK-NEXT: lock xaddl %eax, (%rdi)
-; CHECK-NEXT: subl %esi, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: lock subl %esi, (%rdi)
+; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i32 %1 seq_cst, align 4
%4 = sub i32 %3, %1
More information about the llvm-commits
mailing list