[llvm] a7115d5 - [X86] X86CallFrameOptimization - generalize slow push code path
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 29 03:03:44 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-29T11:01:59+01:00
New Revision: a7115d51be09ebc8953a269d26bda3d0c50dbab2
URL: https://github.com/llvm/llvm-project/commit/a7115d51be09ebc8953a269d26bda3d0c50dbab2
DIFF: https://github.com/llvm/llvm-project/commit/a7115d51be09ebc8953a269d26bda3d0c50dbab2.diff
LOG: [X86] X86CallFrameOptimization - generalize slow push code path
Replace the explicit isAtom() || isSLM() test with the more general (and more specific) slowTwoMemOps() check to avoid the use of the PUSHrmm push from memory case.
This is actually very tricky to test in anything but quite complex code, but the atomic-idempotent.ll tests seem to be the most straightforward to use.
Differential Revision: https://reviews.llvm.org/D76239
Added:
Modified:
llvm/lib/Target/X86/X86CallFrameOptimization.cpp
llvm/test/CodeGen/X86/atomic-idempotent.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index a5831bc8ef0b..1d42dd77016e 100644
--- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -549,7 +549,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction.
- bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
+ bool SlowPUSHrmm = STI->slowTwoMemOps();
// Check that this is legal to fold. Right now, we're extremely
// conservative about that.
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 50c51fc0555c..5d93e23b3015 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SSE2
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-ATOM
; On x86, an atomic rmw operation that does not modify the value in memory
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
@@ -14,12 +18,30 @@ define i8 @add8(i8* %p) {
; X64-NEXT: movb (%rdi), %al
; X64-NEXT: retq
;
-; X86-LABEL: add8:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mfence
-; X86-NEXT: movb (%eax), %al
-; X86-NEXT: retl
+; X86-SSE2-LABEL: add8:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movb (%eax), %al
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: add8:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLM-NEXT: xorl %eax, %eax
+; X86-SLM-NEXT: lock xaddb %al, (%ecx)
+; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: add8:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: xorl %eax, %eax
+; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
+; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
%1 = atomicrmw add i8* %p, i8 0 monotonic
ret i8 %1
}
@@ -31,12 +53,36 @@ define i16 @or16(i16* %p) {
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: retq
;
-; X86-LABEL: or16:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mfence
-; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: retl
+; X86-SSE2-LABEL: or16:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movzwl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: or16:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLM-NEXT: movzwl (%ecx), %eax
+; X86-SLM-NEXT: .p2align 4, 0x90
+; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
+; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
+; X86-SLM-NEXT: jne .LBB1_1
+; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: or16:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: movzwl (%ecx), %eax
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
+; X86-ATOM-NEXT: jne .LBB1_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: retl
%1 = atomicrmw or i16* %p, i16 0 acquire
ret i16 %1
}
@@ -48,12 +94,36 @@ define i32 @xor32(i32* %p) {
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
-; X86-LABEL: xor32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mfence
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: retl
+; X86-SSE2-LABEL: xor32:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: xor32:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLM-NEXT: movl (%ecx), %eax
+; X86-SLM-NEXT: .p2align 4, 0x90
+; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
+; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-SLM-NEXT: jne .LBB2_1
+; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: xor32:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: movl (%ecx), %eax
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-ATOM-NEXT: jne .LBB2_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: retl
%1 = atomicrmw xor i32* %p, i32 0 release
ret i32 %1
}
@@ -105,44 +175,124 @@ define i128 @or128(i128* %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-LABEL: or128:
-; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: .cfi_def_cfa_register %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: .cfi_offset %esi, -16
-; X86-NEXT: .cfi_offset %edi, -12
-; X86-NEXT: movl 8(%ebp), %esi
-; X86-NEXT: movl %esp, %eax
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 12(%ebp)
-; X86-NEXT: pushl %eax
-; X86-NEXT: calll __sync_fetch_and_or_16
-; X86-NEXT: addl $20, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
-; X86-NEXT: .cfi_def_cfa %esp, 4
-; X86-NEXT: retl $4
+; X86-SSE2-LABEL: or128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: .cfi_offset %esi, -16
+; X86-SSE2-NEXT: .cfi_offset %edi, -12
+; X86-SSE2-NEXT: movl 8(%ebp), %esi
+; X86-SSE2-NEXT: movl %esp, %eax
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
+; X86-SSE2-NEXT: addl $20, %esp
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SSE2-NEXT: movl %edi, 8(%esi)
+; X86-SSE2-NEXT: movl %edx, 12(%esi)
+; X86-SSE2-NEXT: movl %eax, (%esi)
+; X86-SSE2-NEXT: movl %ecx, 4(%esi)
+; X86-SSE2-NEXT: movl %esi, %eax
+; X86-SSE2-NEXT: leal -8(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl $4
+;
+; X86-SLM-LABEL: or128:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: pushl %ebp
+; X86-SLM-NEXT: .cfi_def_cfa_offset 8
+; X86-SLM-NEXT: .cfi_offset %ebp, -8
+; X86-SLM-NEXT: movl %esp, %ebp
+; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
+; X86-SLM-NEXT: pushl %edi
+; X86-SLM-NEXT: pushl %esi
+; X86-SLM-NEXT: andl $-8, %esp
+; X86-SLM-NEXT: subl $16, %esp
+; X86-SLM-NEXT: .cfi_offset %esi, -16
+; X86-SLM-NEXT: .cfi_offset %edi, -12
+; X86-SLM-NEXT: movl 8(%ebp), %esi
+; X86-SLM-NEXT: movl 12(%ebp), %eax
+; X86-SLM-NEXT: movl %esp, %ecx
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl %eax
+; X86-SLM-NEXT: pushl %ecx
+; X86-SLM-NEXT: calll __sync_fetch_and_or_16
+; X86-SLM-NEXT: addl $20, %esp
+; X86-SLM-NEXT: movl (%esp), %eax
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLM-NEXT: movl %edi, 8(%esi)
+; X86-SLM-NEXT: movl %edx, 12(%esi)
+; X86-SLM-NEXT: movl %eax, (%esi)
+; X86-SLM-NEXT: movl %ecx, 4(%esi)
+; X86-SLM-NEXT: movl %esi, %eax
+; X86-SLM-NEXT: leal -8(%ebp), %esp
+; X86-SLM-NEXT: popl %esi
+; X86-SLM-NEXT: popl %edi
+; X86-SLM-NEXT: popl %ebp
+; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
+; X86-SLM-NEXT: retl $4
+;
+; X86-ATOM-LABEL: or128:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: pushl %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
+; X86-ATOM-NEXT: .cfi_offset %ebp, -8
+; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: pushl %edi
+; X86-ATOM-NEXT: pushl %esi
+; X86-ATOM-NEXT: andl $-8, %esp
+; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: .cfi_offset %esi, -16
+; X86-ATOM-NEXT: .cfi_offset %edi, -12
+; X86-ATOM-NEXT: movl 8(%ebp), %esi
+; X86-ATOM-NEXT: movl 12(%ebp), %eax
+; X86-ATOM-NEXT: movl %esp, %ecx
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl %eax
+; X86-ATOM-NEXT: pushl %ecx
+; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: movl (%esp), %ecx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-ATOM-NEXT: movl %eax, 8(%esi)
+; X86-ATOM-NEXT: movl %edi, 12(%esi)
+; X86-ATOM-NEXT: movl %ecx, (%esi)
+; X86-ATOM-NEXT: movl %esi, %eax
+; X86-ATOM-NEXT: movl %edx, 4(%esi)
+; X86-ATOM-NEXT: leal -8(%ebp), %esp
+; X86-ATOM-NEXT: popl %esi
+; X86-ATOM-NEXT: popl %edi
+; X86-ATOM-NEXT: popl %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
+; X86-ATOM-NEXT: retl $4
%1 = atomicrmw or i128* %p, i128 0 monotonic
ret i128 %1
}
@@ -155,49 +305,137 @@ define i32 @and32 (i32* %p) {
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
-; X86-LABEL: and32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mfence
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: retl
+; X86-SSE2-LABEL: and32:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: and32:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLM-NEXT: movl (%ecx), %eax
+; X86-SLM-NEXT: .p2align 4, 0x90
+; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
+; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-SLM-NEXT: jne .LBB5_1
+; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: and32:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: movl (%ecx), %eax
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-ATOM-NEXT: jne .LBB5_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: retl
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
ret i32 %1
}
define void @or32_nouse_monotonic(i32* %p) {
-; CHECK-LABEL: or32_nouse_monotonic:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #MEMBARRIER
-; CHECK-NEXT: ret{{[l|q]}}
+; X64-LABEL: or32_nouse_monotonic:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: or32_nouse_monotonic:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or32_nouse_monotonic:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 monotonic
ret void
}
define void @or32_nouse_acquire(i32* %p) {
-; CHECK-LABEL: or32_nouse_acquire:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #MEMBARRIER
-; CHECK-NEXT: ret{{[l|q]}}
+; X64-LABEL: or32_nouse_acquire:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: or32_nouse_acquire:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or32_nouse_acquire:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acquire
ret void
}
define void @or32_nouse_release(i32* %p) {
-; CHECK-LABEL: or32_nouse_release:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #MEMBARRIER
-; CHECK-NEXT: ret{{[l|q]}}
+; X64-LABEL: or32_nouse_release:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: or32_nouse_release:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or32_nouse_release:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 release
ret void
}
define void @or32_nouse_acq_rel(i32* %p) {
-; CHECK-LABEL: or32_nouse_acq_rel:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #MEMBARRIER
-; CHECK-NEXT: ret{{[l|q]}}
+; X64-LABEL: or32_nouse_acq_rel:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: or32_nouse_acq_rel:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or32_nouse_acq_rel:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acq_rel
ret void
}
@@ -208,10 +446,21 @@ define void @or32_nouse_seq_cst(i32* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
-; X86-LABEL: or32_nouse_seq_cst:
-; X86: # %bb.0:
-; X86-NEXT: lock orl $0, (%esp)
-; X86-NEXT: retl
+; X86-GENERIC-LABEL: or32_nouse_seq_cst:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: lock orl $0, (%esp)
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or32_nouse_seq_cst:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 seq_cst
ret void
}
@@ -264,28 +513,76 @@ define void @or128_nouse_seq_cst(i128* %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-LABEL: or128_nouse_seq_cst:
-; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: .cfi_def_cfa_register %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl %esp, %eax
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 8(%ebp)
-; X86-NEXT: pushl %eax
-; X86-NEXT: calll __sync_fetch_and_or_16
-; X86-NEXT: addl $20, %esp
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
-; X86-NEXT: .cfi_def_cfa %esp, 4
-; X86-NEXT: retl
+; X86-SSE2-LABEL: or128_nouse_seq_cst:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl %esp, %eax
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
+; X86-SSE2-NEXT: addl $20, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: or128_nouse_seq_cst:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: pushl %ebp
+; X86-SLM-NEXT: .cfi_def_cfa_offset 8
+; X86-SLM-NEXT: .cfi_offset %ebp, -8
+; X86-SLM-NEXT: movl %esp, %ebp
+; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
+; X86-SLM-NEXT: andl $-8, %esp
+; X86-SLM-NEXT: subl $16, %esp
+; X86-SLM-NEXT: movl 8(%ebp), %eax
+; X86-SLM-NEXT: movl %esp, %ecx
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl $0
+; X86-SLM-NEXT: pushl %eax
+; X86-SLM-NEXT: pushl %ecx
+; X86-SLM-NEXT: calll __sync_fetch_and_or_16
+; X86-SLM-NEXT: addl $20, %esp
+; X86-SLM-NEXT: movl %ebp, %esp
+; X86-SLM-NEXT: popl %ebp
+; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: or128_nouse_seq_cst:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: pushl %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
+; X86-ATOM-NEXT: .cfi_offset %ebp, -8
+; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: andl $-8, %esp
+; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: movl 8(%ebp), %eax
+; X86-ATOM-NEXT: movl %esp, %ecx
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: pushl %eax
+; X86-ATOM-NEXT: pushl %ecx
+; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: movl %ebp, %esp
+; X86-ATOM-NEXT: popl %ebp
+; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
+; X86-ATOM-NEXT: retl
atomicrmw or i128* %p, i128 0 seq_cst
ret void
}
@@ -297,10 +594,21 @@ define void @or16_nouse_seq_cst(i16* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
-; X86-LABEL: or16_nouse_seq_cst:
-; X86: # %bb.0:
-; X86-NEXT: lock orl $0, (%esp)
-; X86-NEXT: retl
+; X86-GENERIC-LABEL: or16_nouse_seq_cst:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: lock orl $0, (%esp)
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or16_nouse_seq_cst:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i16* %p, i16 0 seq_cst
ret void
}
@@ -311,10 +619,21 @@ define void @or8_nouse_seq_cst(i8* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
-; X86-LABEL: or8_nouse_seq_cst:
-; X86: # %bb.0:
-; X86-NEXT: lock orl $0, (%esp)
-; X86-NEXT: retl
+; X86-GENERIC-LABEL: or8_nouse_seq_cst:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: lock orl $0, (%esp)
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or8_nouse_seq_cst:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw or i8* %p, i8 0 seq_cst
ret void
}
More information about the llvm-commits
mailing list