[llvm] 510e5fb - [X86] Use lock or/and/xor for cases that we only care about the EFLAGS
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 19 19:18:46 PST 2022
Author: Phoebe Wang
Date: 2022-11-20T10:42:48+08:00
New Revision: 510e5fba16382eef577eb18f4b2c136a3ea60fc1
URL: https://github.com/llvm/llvm-project/commit/510e5fba16382eef577eb18f4b2c136a3ea60fc1
DIFF: https://github.com/llvm/llvm-project/commit/510e5fba16382eef577eb18f4b2c136a3ea60fc1.diff
LOG: [X86] Use lock or/and/xor for cases that we only care about the EFLAGS
This is a follow up of D137711 to fix the reset of #58685.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D138294
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr58685.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 34f884f8a1676..7c39f14440138 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -78,6 +78,12 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<2>>]>;
def int_x86_atomic_sub_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
+ def int_x86_atomic_or_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<2>>]>;
+ def int_x86_atomic_and_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<2>>]>;
+ def int_x86_atomic_xor_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<2>>]>;
}
// Read Processor Register.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dc8821397ebd1..89eeab5b31147 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5661,7 +5661,10 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::x86_axor32:
case Intrinsic::x86_axor64:
case Intrinsic::x86_atomic_add_cc:
- case Intrinsic::x86_atomic_sub_cc: {
+ case Intrinsic::x86_atomic_sub_cc:
+ case Intrinsic::x86_atomic_or_cc:
+ case Intrinsic::x86_atomic_and_cc:
+ case Intrinsic::x86_atomic_xor_cc: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(0);
unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
@@ -28385,7 +28388,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op1, Op2}, VT, MMO);
}
case Intrinsic::x86_atomic_add_cc:
- case Intrinsic::x86_atomic_sub_cc: {
+ case Intrinsic::x86_atomic_sub_cc:
+ case Intrinsic::x86_atomic_or_cc:
+ case Intrinsic::x86_atomic_and_cc:
+ case Intrinsic::x86_atomic_xor_cc: {
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Op1 = Op.getOperand(2);
@@ -28402,6 +28408,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case Intrinsic::x86_atomic_sub_cc:
Opc = X86ISD::LSUB;
break;
+ case Intrinsic::x86_atomic_or_cc:
+ Opc = X86ISD::LOR;
+ break;
+ case Intrinsic::x86_atomic_and_cc:
+ Opc = X86ISD::LAND;
+ break;
+ case Intrinsic::x86_atomic_xor_cc:
+ Opc = X86ISD::LXOR;
+ break;
}
MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
SDValue LockArith =
@@ -31417,6 +31432,23 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
return Pred == CmpInst::ICMP_SLT;
return false;
}
+ if (Opc == AtomicRMWInst::Or) {
+ if (match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))) &&
+ match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+ return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
+ }
+ if (Opc == AtomicRMWInst::And) {
+ if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))) &&
+ match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+ return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
+ }
+ if (Opc == AtomicRMWInst::Xor) {
+ if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
+ return Pred == CmpInst::ICMP_EQ;
+ if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))) &&
+ match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+ return Pred == CmpInst::ICMP_SLT;
+ }
return false;
}
@@ -31446,6 +31478,15 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
case AtomicRMWInst::Sub:
IID = Intrinsic::x86_atomic_sub_cc;
break;
+ case AtomicRMWInst::Or:
+ IID = Intrinsic::x86_atomic_or_cc;
+ break;
+ case AtomicRMWInst::And:
+ IID = Intrinsic::x86_atomic_and_cc;
+ break;
+ case AtomicRMWInst::Xor:
+ IID = Intrinsic::x86_atomic_xor_cc;
+ break;
}
Function *CmpArith =
Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
@@ -31487,6 +31528,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
+ if (shouldExpandCmpArithRMWInIR(AI))
+ return AtomicExpansionKind::CmpArithIntrinsic;
return shouldExpandLogicAtomicRMWInIR(AI);
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
diff --git a/llvm/test/CodeGen/X86/pr58685.ll b/llvm/test/CodeGen/X86/pr58685.ll
index 2323162a8cdc4..0bd87854a3ce9 100644
--- a/llvm/test/CodeGen/X86/pr58685.ll
+++ b/llvm/test/CodeGen/X86/pr58685.ll
@@ -51,16 +51,7 @@ define i1 @lock_sub_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_sete:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: orl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB4_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: lock orl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
@@ -72,18 +63,8 @@ define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_sets:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: orl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB5_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: orl %esi, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: lock orl %esi, (%rdi)
+; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
%4 = or i32 %3, %1
@@ -94,16 +75,7 @@ define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_sete:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB6_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: testl %esi, %eax
+; CHECK-NEXT: lock andl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
@@ -115,18 +87,8 @@ define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_sets:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB7_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andl %esi, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: lock andl %esi, (%rdi)
+; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
%4 = and i32 %3, %1
@@ -137,16 +99,7 @@ define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_sete:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB8_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: xorl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB8_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: cmpl %esi, %eax
+; CHECK-NEXT: lock xorl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
@@ -157,18 +110,8 @@ define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_sets:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB9_1: # %atomicrmw.start
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: xorl %esi, %ecx
-; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT: jne .LBB9_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: xorl %esi, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: lock xorl %esi, (%rdi)
+; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
%4 = xor i32 %3, %1
More information about the llvm-commits
mailing list