[llvm] 510e5fb - [X86] Use lock or/and/xor for cases that we only care about the EFLAGS

Sat Nov 19 19:18:46 PST 2022

Author: Phoebe Wang
Date: 2022-11-20T10:42:48+08:00
New Revision: 510e5fba16382eef577eb18f4b2c136a3ea60fc1

URL: https://github.com/llvm/llvm-project/commit/510e5fba16382eef577eb18f4b2c136a3ea60fc1
DIFF: https://github.com/llvm/llvm-project/commit/510e5fba16382eef577eb18f4b2c136a3ea60fc1.diff

LOG: [X86] Use lock or/and/xor for cases that we only care about the EFLAGS

This is a follow up of D137711 to fix the reset of #58685.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D138294

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/pr58685.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 34f884f8a1676..7c39f14440138 100644

--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -78,6 +78,12 @@ let TargetPrefix = "x86" in {
                                          [ImmArg<ArgIndex<2>>]>;
   def int_x86_atomic_sub_cc  : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
                                          [ImmArg<ArgIndex<2>>]>;
+  def int_x86_atomic_or_cc  : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+                                         [ImmArg<ArgIndex<2>>]>;
+  def int_x86_atomic_and_cc  : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+                                         [ImmArg<ArgIndex<2>>]>;
+  def int_x86_atomic_xor_cc  : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
+                                         [ImmArg<ArgIndex<2>>]>;
 }
 
 // Read Processor Register.

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dc8821397ebd1..89eeab5b31147 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5661,7 +5661,10 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     case Intrinsic::x86_axor32:
     case Intrinsic::x86_axor64:
     case Intrinsic::x86_atomic_add_cc:
-    case Intrinsic::x86_atomic_sub_cc: {
+    case Intrinsic::x86_atomic_sub_cc:
+    case Intrinsic::x86_atomic_or_cc:
+    case Intrinsic::x86_atomic_and_cc:
+    case Intrinsic::x86_atomic_xor_cc: {
       Info.opc = ISD::INTRINSIC_W_CHAIN;
       Info.ptrVal = I.getArgOperand(0);
       unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
@@ -28385,7 +28388,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                      {Chain, Op1, Op2}, VT, MMO);
     }
     case Intrinsic::x86_atomic_add_cc:
-    case Intrinsic::x86_atomic_sub_cc: {
+    case Intrinsic::x86_atomic_sub_cc:
+    case Intrinsic::x86_atomic_or_cc:
+    case Intrinsic::x86_atomic_and_cc:
+    case Intrinsic::x86_atomic_xor_cc: {
       SDLoc DL(Op);
       SDValue Chain = Op.getOperand(0);
       SDValue Op1 = Op.getOperand(2);
@@ -28402,6 +28408,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       case Intrinsic::x86_atomic_sub_cc:
         Opc = X86ISD::LSUB;
         break;
+      case Intrinsic::x86_atomic_or_cc:
+        Opc = X86ISD::LOR;
+        break;
+      case Intrinsic::x86_atomic_and_cc:
+        Opc = X86ISD::LAND;
+        break;
+      case Intrinsic::x86_atomic_xor_cc:
+        Opc = X86ISD::LXOR;
+        break;
       }
       MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
       SDValue LockArith =
@@ -31417,6 +31432,23 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
       return Pred == CmpInst::ICMP_SLT;
     return false;
   }
+  if (Opc == AtomicRMWInst::Or) {
+    if (match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))) &&
+        match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+      return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
+  }
+  if (Opc == AtomicRMWInst::And) {
+    if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))) &&
+        match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+      return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
+  }
+  if (Opc == AtomicRMWInst::Xor) {
+    if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
+      return Pred == CmpInst::ICMP_EQ;
+    if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))) &&
+        match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
+      return Pred == CmpInst::ICMP_SLT;
+  }
 
   return false;
 }
@@ -31446,6 +31478,15 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
   case AtomicRMWInst::Sub:
     IID = Intrinsic::x86_atomic_sub_cc;
     break;
+  case AtomicRMWInst::Or:
+    IID = Intrinsic::x86_atomic_or_cc;
+    break;
+  case AtomicRMWInst::And:
+    IID = Intrinsic::x86_atomic_and_cc;
+    break;
+  case AtomicRMWInst::Xor:
+    IID = Intrinsic::x86_atomic_xor_cc;
+    break;
   }
   Function *CmpArith =
       Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
@@ -31487,6 +31528,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::Or:
   case AtomicRMWInst::And:
   case AtomicRMWInst::Xor:
+    if (shouldExpandCmpArithRMWInIR(AI))
+      return AtomicExpansionKind::CmpArithIntrinsic;
     return shouldExpandLogicAtomicRMWInIR(AI);
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Max:

diff  --git a/llvm/test/CodeGen/X86/pr58685.ll b/llvm/test/CodeGen/X86/pr58685.ll
index 2323162a8cdc4..0bd87854a3ce9 100644
--- a/llvm/test/CodeGen/X86/pr58685.ll
+++ b/llvm/test/CodeGen/X86/pr58685.ll
@@ -51,16 +51,7 @@ define i1 @lock_sub_sets(ptr %0, i32 %1) nounwind {
 define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_or_sete:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB4_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    lock orl %esi, (%rdi)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
@@ -72,18 +63,8 @@ define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
 define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_or_sets:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB5_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    shrl $31, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    lock orl %esi, (%rdi)
+; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
   %4 = or i32 %3, %1
@@ -94,16 +75,7 @@ define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
 define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_and_sete:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB6_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    testl %esi, %eax
+; CHECK-NEXT:    lock andl %esi, (%rdi)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
@@ -115,18 +87,8 @@ define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
 define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_and_sets:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB7_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    shrl $31, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    lock andl %esi, (%rdi)
+; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
   %4 = and i32 %3, %1
@@ -137,16 +99,7 @@ define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
 define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_xor_sete:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB8_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    xorl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB8_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    cmpl %esi, %eax
+; CHECK-NEXT:    lock xorl %esi, (%rdi)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
@@ -157,18 +110,8 @@ define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
 define i1 @lock_xor_sets(ptr %0, i32 %1) nounwind {
 ; CHECK-LABEL: lock_xor_sets:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB9_1: # %atomicrmw.start
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    xorl %esi, %ecx
-; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
-; CHECK-NEXT:    jne .LBB9_1
-; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    shrl $31, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    lock xorl %esi, (%rdi)
+; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    retq
   %3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
   %4 = xor i32 %3, %1