[llvm] r289955 - [X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367)
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 16 08:34:59 PST 2016
Author: hans
Date: Fri Dec 16 10:34:59 2016
New Revision: 289955
URL: http://llvm.org/viewvc/llvm-project?rev=289955&view=rev
Log:
[X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367)
atomic_load_add returns the value before addition, but sets EFLAGS based on the
result of the addition. That means it's setting the flags based on effectively
subtracting C from the value at x, which is also what the outer cmp does.
This targets a pattern that occurs frequently with reference counting pointers:
void decrement(long volatile *ptr) {
if (_InterlockedDecrement(ptr) == 0)
release();
}
Clang would previously compile it (for 32-bit at -Os) as:
00000000 <?decrement@@YAXPCJ at Z>:
0: 8b 44 24 04 mov 0x4(%esp),%eax
4: 31 c9 xor %ecx,%ecx
6: 49 dec %ecx
7: f0 0f c1 08 lock xadd %ecx,(%eax)
b: 83 f9 01 cmp $0x1,%ecx
e: 0f 84 00 00 00 00 je 14 <?decrement@@YAXPCJ at Z+0x14>
14: c3 ret
and with this patch it becomes:
00000000 <?decrement@@YAXPCJ at Z>:
0: 8b 44 24 04 mov 0x4(%esp),%eax
4: f0 ff 08 lock decl (%eax)
7: 0f 84 00 00 00 00 je d <?decrement@@YAXPCJ at Z+0xd>
d: c3 ret
(Equivalent variants with _InterlockedExchangeAdd, std::atomic<>'s fetch_add
or pre-decrement operator generate the same code.)
Differential Revision: https://reviews.llvm.org/D27781
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=289955&r1=289954&r2=289955&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 16 10:34:59 2016
@@ -28879,11 +28879,19 @@ static SDValue combineSelect(SDNode *N,
return SDValue();
}
-/// Combine:
+/// Combine brcond/cmov/setcc/.. based on comparing the result of
+/// atomic_load_add to use EFLAGS produced by the addition
+/// directly if possible. For example:
+///
+/// (setcc (cmp (atomic_load_add x, -C) C), COND_E)
+/// becomes:
+/// (setcc (LADD x, -C), COND_E)
+///
+/// and
/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
-/// to:
+/// becomes:
/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
-/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
+///
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
SelectionDAG &DAG) {
@@ -28892,7 +28900,7 @@ static SDValue combineSetCCAtomicArith(S
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
return SDValue();
- // This only applies to variations of the common case:
+ // This applies to variations of the common case:
// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
@@ -28911,8 +28919,9 @@ static SDValue combineSetCCAtomicArith(S
return SDValue();
auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
- if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
+ if (!CmpRHSC)
return SDValue();
+ APInt Comparand = CmpRHSC->getAPIntValue();
const unsigned Opc = CmpLHS.getOpcode();
@@ -28928,13 +28937,15 @@ static SDValue combineSetCCAtomicArith(S
if (Opc == ISD::ATOMIC_LOAD_SUB)
Addend = -Addend;
- if (CC == X86::COND_S && Addend == 1)
+ if (Comparand == -Addend)
+ CC = CC; // No change.
+ else if (CC == X86::COND_S && Comparand == 0 && Addend == 1)
CC = X86::COND_LE;
- else if (CC == X86::COND_NS && Addend == 1)
+ else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1)
CC = X86::COND_G;
- else if (CC == X86::COND_G && Addend == -1)
+ else if (CC == X86::COND_G && Comparand == 0 && Addend == -1)
CC = X86::COND_GE;
- else if (CC == X86::COND_LE && Addend == -1)
+ else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1)
CC = X86::COND_L;
else
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll?rev=289955&r1=289954&r2=289955&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll Fri Dec 16 10:34:59 2016
@@ -176,4 +176,45 @@ entry:
ret i8 %tmp2
}
+define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_setcc_eq:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+entry:
+ %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+ %tmp1 = icmp eq i64 %tmp0, 1
+ %tmp2 = zext i1 %tmp1 to i8
+ ret i8 %tmp2
+}
+
+define i8 @test_add_5_setcc_ne(i64* %p) #0 {
+; CHECK-LABEL: test_add_5_setcc_ne:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: lock addq $5, (%rdi)
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+entry:
+ %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
+ %tmp1 = icmp ne i64 %tmp0, -5
+ %tmp2 = zext i1 %tmp1 to i8
+ ret i8 %tmp2
+}
+
+define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 {
+; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $5, %eax
+; CHECK-NEXT: lock xaddq %rax, (%rdi)
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+entry:
+ %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
+ %tmp1 = icmp ne i64 %tmp0, 0
+ %tmp2 = zext i1 %tmp1 to i8
+ ret i8 %tmp2
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list