[llvm] 3e998ed - [X86] Lower used `(atomicrmw xor p, SignBit)` as `(atomicrmw add p, SignBit)`
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun May 7 17:12:21 PDT 2023
Author: Noah Goldstein
Date: 2023-05-07T19:11:53-05:00
New Revision: 3e998ede64e0667ac2cf04e05c41988a71bc044c
URL: https://github.com/llvm/llvm-project/commit/3e998ede64e0667ac2cf04e05c41988a71bc044c
DIFF: https://github.com/llvm/llvm-project/commit/3e998ede64e0667ac2cf04e05c41988a71bc044c.diff
LOG: [X86] Lower used `(atomicrmw xor p, SignBit)` as `(atomicrmw add p, SignBit)`
`(xor X, SignBit)` == `(add X, SignBit)`. For atomics whose result is
used, the `add` option is preferable because of the `xadd` instruction
which allows us to avoid either a CAS loop or a `btc; setcc; shl`.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D149689
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/atomic-bit-test.ll
llvm/test/CodeGen/X86/atomic-xor.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ca5c9c66ed9c..b09655b78586 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32399,11 +32399,19 @@ static std::pair<Value *, BitTestKind> FindSingleBitChange(Value *V) {
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
+ using namespace llvm::PatternMatch;
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
if (AI->use_empty())
return AtomicExpansionKind::None;
+ if (AI->getOperation() == AtomicRMWInst::Xor) {
+ // A ^ SignBit -> A + SignBit. This allows us to use `xadd` which is
+ // preferable to both `cmpxchg` and `btc`.
+ if (match(AI->getOperand(1), m_SignMask()))
+ return AtomicExpansionKind::None;
+ }
+
// If the atomicrmw's result is used by a single bit AND, we may use
// bts/btr/btc instruction for these operations.
// Note: InstCombinePass can cause a de-optimization here. It replaces the
@@ -33393,10 +33401,13 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
if (N->hasAnyUseOfValue(0)) {
// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
// select LXADD if LOCK_SUB can't be selected.
- if (Opc == ISD::ATOMIC_LOAD_SUB) {
+ // Handle (atomic_load_xor p, SignBit) as (atomic_load_add p, SignBit) so we
+ // can use LXADD as opposed to cmpxchg.
+ if (Opc == ISD::ATOMIC_LOAD_SUB ||
+ (Opc == ISD::ATOMIC_LOAD_XOR && isMinSignedConstant(RHS))) {
RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
- RHS, AN->getMemOperand());
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
+ AN->getMemOperand());
}
assert(Opc == ISD::ATOMIC_LOAD_ADD &&
"Used AtomicRMW ops other than Add should have been expanded!");
diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll
index 1be987388bde..f39c4b5e620d 100644
--- a/llvm/test/CodeGen/X86/atomic-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll
@@ -183,19 +183,17 @@ entry:
define i16 @btc15() nounwind {
; X86-LABEL: btc15:
; X86: # %bb.0: # %entry
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: lock btcw $15, v16
-; X86-NEXT: setb %al
-; X86-NEXT: shll $15, %eax
+; X86-NEXT: movw $-32768, %ax # imm = 0x8000
+; X86-NEXT: lock xaddw %ax, v16
+; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: btc15:
; X64: # %bb.0: # %entry
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: lock btcw $15, v16(%rip)
-; X64-NEXT: setb %al
-; X64-NEXT: shll $15, %eax
+; X64-NEXT: movw $-32768, %ax # imm = 0x8000
+; X64-NEXT: lock xaddw %ax, v16(%rip)
+; X64-NEXT: andl $32768, %eax # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
entry:
@@ -207,18 +205,16 @@ entry:
define i32 @btc31() nounwind {
; X86-LABEL: btc31:
; X86: # %bb.0: # %entry
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: lock btcl $31, v32
-; X86-NEXT: setb %al
-; X86-NEXT: shll $31, %eax
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: lock xaddl %eax, v32
+; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: retl
;
; X64-LABEL: btc31:
; X64: # %bb.0: # %entry
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: lock btcl $31, v32(%rip)
-; X64-NEXT: setb %al
-; X64-NEXT: shll $31, %eax
+; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: lock xaddl %eax, v32(%rip)
+; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: retq
entry:
%0 = atomicrmw xor ptr @v32, i32 2147483648 monotonic, align 4
@@ -251,10 +247,10 @@ define i64 @btc63() nounwind {
;
; X64-LABEL: btc63:
; X64: # %bb.0: # %entry
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: lock btcq $63, v64(%rip)
-; X64-NEXT: setb %al
-; X64-NEXT: shlq $63, %rax
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: lock xaddq %rax, v64(%rip)
+; X64-NEXT: andq %rcx, %rax
; X64-NEXT: retq
entry:
%0 = atomicrmw xor ptr @v64, i64 -9223372036854775808 monotonic, align 8
diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll
index d0738356bb62..5f33cb8cd98a 100644
--- a/llvm/test/CodeGen/X86/atomic-xor.ll
+++ b/llvm/test/CodeGen/X86/atomic-xor.ll
@@ -85,16 +85,8 @@ define i64 @xor64_signbit_used(ptr %p) nounwind {
;
; X64-LABEL: xor64_signbit_used:
; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB2_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: xorq %rcx, %rdx
-; X64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; X64-NEXT: jne .LBB2_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-NEXT: lock xaddq %rax, (%rdi)
; X64-NEXT: retq
%r = atomicrmw xor ptr %p, i64 9223372036854775808 monotonic
ret i64 %r
@@ -104,29 +96,14 @@ define i32 @xor32_signbit_used(ptr %p) nounwind {
; X86-LABEL: xor32_signbit_used:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB3_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: leal -2147483648(%eax), %edx
-; X86-NEXT: lock cmpxchgl %edx, (%ecx)
-; X86-NEXT: jne .LBB3_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: lock xaddl %eax, (%ecx)
; X86-NEXT: retl
;
; X64-LABEL: xor32_signbit_used:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB3_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: leal -2147483648(%rax), %ecx
-; X64-NEXT: # kill: def $eax killed $eax killed $rax
-; X64-NEXT: lock cmpxchgl %ecx, (%rdi)
-; X64-NEXT: # kill: def $eax killed $eax def $rax
-; X64-NEXT: jne .LBB3_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: lock xaddl %eax, (%rdi)
; X64-NEXT: retq
%r = atomicrmw xor ptr %p, i32 2147483648 monotonic
ret i32 %r
@@ -136,34 +113,14 @@ define i16 @xor16_signbit_used(ptr %p) nounwind {
; X86-LABEL: xor16_signbit_used:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB4_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: xorl $32768, %edx # imm = 0x8000
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %dx, (%ecx)
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: jne .LBB4_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: movw $-32768, %ax # imm = 0x8000
+; X86-NEXT: lock xaddw %ax, (%ecx)
; X86-NEXT: retl
;
; X64-LABEL: xor16_signbit_used:
; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB4_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: xorl $32768, %ecx # imm = 0x8000
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %cx, (%rdi)
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: jne .LBB4_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: movw $-32768, %ax # imm = 0x8000
+; X64-NEXT: lock xaddw %ax, (%rdi)
; X64-NEXT: retq
%r = atomicrmw xor ptr %p, i16 32768 monotonic
ret i16 %r
@@ -173,30 +130,14 @@ define i8 @xor8_signbit_used(ptr %p) nounwind {
; X86-LABEL: xor8_signbit_used:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %eax
-; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB5_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: addb $-128, %dl
-; X86-NEXT: lock cmpxchgb %dl, (%ecx)
-; X86-NEXT: jne .LBB5_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: movb $-128, %al
+; X86-NEXT: lock xaddb %al, (%ecx)
; X86-NEXT: retl
;
; X64-LABEL: xor8_signbit_used:
; X64: # %bb.0:
-; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: .LBB5_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: leal -128(%rax), %ecx
-; X64-NEXT: # kill: def $al killed $al killed $rax
-; X64-NEXT: lock cmpxchgb %cl, (%rdi)
-; X64-NEXT: # kill: def $al killed $al def $rax
-; X64-NEXT: jne .LBB5_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: # kill: def $al killed $al killed $rax
+; X64-NEXT: movb $-128, %al
+; X64-NEXT: lock xaddb %al, (%rdi)
; X64-NEXT: retq
%r = atomicrmw xor ptr %p, i8 128 monotonic
ret i8 %r
More information about the llvm-commits
mailing list