[llvm] [X86] Set MaxAtomicSizeInBitsSupported. (PR #75112)
James Y Knight via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 11 19:35:51 PST 2023
https://github.com/jyknight updated https://github.com/llvm/llvm-project/pull/75112
>From fe53c9d4643b515468f0d37860ba8d286a8fdd74 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Sat, 9 Dec 2023 20:52:30 -0500
Subject: [PATCH 1/3] [X86] Set MaxAtomicSizeInBitsSupported.
This will result in larger atomic operations getting expanded to
__atomic_* libcalls via AtomicExpandPass, which matches what Clang
already does in the frontend.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +-
llvm/test/CodeGen/X86/atomic-idempotent.ll | 347 ++++++++++--------
llvm/test/CodeGen/X86/atomic-nocx16.ll | 54 ++-
.../test/CodeGen/X86/atomic-ops-ancient-64.ll | 15 +-
llvm/test/CodeGen/X86/atomic-xor.ll | 57 ++-
5 files changed, 288 insertions(+), 195 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d69976342fcbd0..b284378a5fb292 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -142,11 +142,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::POWI_F64, nullptr);
}
- // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
- // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
- // FIXME: Should we be limiting the atomic size on other configs? Default is
- // 1024.
- if (!Subtarget.canUseCMPXCHG8B())
+ if (Subtarget.canUseCMPXCHG16B())
+ setMaxAtomicSizeInBitsSupported(128);
+ else if (Subtarget.canUseCMPXCHG8B())
+ setMaxAtomicSizeInBitsSupported(64);
+ else
setMaxAtomicSizeInBitsSupported(32);
setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 3a9648bd1fbb50..d5c46485068a64 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -170,117 +170,130 @@ define i128 @or128(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: callq __sync_fetch_and_or_16 at PLT
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: callq __atomic_fetch_or_16 at PLT
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-SSE2-LABEL: or128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: andl $-16, %esp
-; X86-SSE2-NEXT: subl $32, %esp
-; X86-SSE2-NEXT: .cfi_offset %esi, -12
-; X86-SSE2-NEXT: movl 8(%ebp), %esi
-; X86-SSE2-NEXT: movl %esp, %eax
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl 12(%ebp)
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
-; X86-SSE2-NEXT: addl $20, %esp
-; X86-SSE2-NEXT: movaps (%esp), %xmm0
-; X86-SSE2-NEXT: movaps %xmm0, (%esi)
-; X86-SSE2-NEXT: movl %esi, %eax
-; X86-SSE2-NEXT: leal -4(%ebp), %esp
-; X86-SSE2-NEXT: popl %esi
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
-; X86-SSE2-NEXT: retl $4
-;
-; X86-SLM-LABEL: or128:
-; X86-SLM: # %bb.0:
-; X86-SLM-NEXT: pushl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_offset 8
-; X86-SLM-NEXT: .cfi_offset %ebp, -8
-; X86-SLM-NEXT: movl %esp, %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
-; X86-SLM-NEXT: pushl %edi
-; X86-SLM-NEXT: pushl %esi
-; X86-SLM-NEXT: andl $-16, %esp
-; X86-SLM-NEXT: subl $16, %esp
-; X86-SLM-NEXT: .cfi_offset %esi, -16
-; X86-SLM-NEXT: .cfi_offset %edi, -12
-; X86-SLM-NEXT: movl 8(%ebp), %esi
-; X86-SLM-NEXT: movl 12(%ebp), %eax
-; X86-SLM-NEXT: movl %esp, %ecx
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl %eax
-; X86-SLM-NEXT: pushl %ecx
-; X86-SLM-NEXT: calll __sync_fetch_and_or_16
-; X86-SLM-NEXT: addl $20, %esp
-; X86-SLM-NEXT: movl (%esp), %eax
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLM-NEXT: movl %edi, 8(%esi)
-; X86-SLM-NEXT: movl %edx, 12(%esi)
-; X86-SLM-NEXT: movl %eax, (%esi)
-; X86-SLM-NEXT: movl %ecx, 4(%esi)
-; X86-SLM-NEXT: movl %esi, %eax
-; X86-SLM-NEXT: leal -8(%ebp), %esp
-; X86-SLM-NEXT: popl %esi
-; X86-SLM-NEXT: popl %edi
-; X86-SLM-NEXT: popl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
-; X86-SLM-NEXT: retl $4
+; X86-GENERIC-LABEL: or128:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: pushl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
+; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
+; X86-GENERIC-NEXT: movl %esp, %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
+; X86-GENERIC-NEXT: pushl %ebx
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: andl $-16, %esp
+; X86-GENERIC-NEXT: subl $48, %esp
+; X86-GENERIC-NEXT: .cfi_offset %esi, -20
+; X86-GENERIC-NEXT: .cfi_offset %edi, -16
+; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
+; X86-GENERIC-NEXT: movl 12(%ebp), %edi
+; X86-GENERIC-NEXT: movl 12(%edi), %ecx
+; X86-GENERIC-NEXT: movl 8(%edi), %edx
+; X86-GENERIC-NEXT: movl (%edi), %ebx
+; X86-GENERIC-NEXT: movl 4(%edi), %esi
+; X86-GENERIC-NEXT: .p2align 4, 0x90
+; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start
+; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-GENERIC-NEXT: movl %ebx, (%esp)
+; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: pushl $0
+; X86-GENERIC-NEXT: pushl $0
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl $16
+; X86-GENERIC-NEXT: calll __atomic_compare_exchange at PLT
+; X86-GENERIC-NEXT: addl $24, %esp
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-GENERIC-NEXT: movl (%esp), %ebx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-GENERIC-NEXT: testb %al, %al
+; X86-GENERIC-NEXT: je .LBB4_1
+; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
+; X86-GENERIC-NEXT: movl 8(%ebp), %eax
+; X86-GENERIC-NEXT: movl %ebx, (%eax)
+; X86-GENERIC-NEXT: movl %esi, 4(%eax)
+; X86-GENERIC-NEXT: movl %edx, 8(%eax)
+; X86-GENERIC-NEXT: movl %ecx, 12(%eax)
+; X86-GENERIC-NEXT: leal -12(%ebp), %esp
+; X86-GENERIC-NEXT: popl %esi
+; X86-GENERIC-NEXT: popl %edi
+; X86-GENERIC-NEXT: popl %ebx
+; X86-GENERIC-NEXT: popl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
+; X86-GENERIC-NEXT: retl $4
;
; X86-ATOM-LABEL: or128:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
-; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: movl %esp, %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: pushl %ebx
; X86-ATOM-NEXT: pushl %edi
; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: .cfi_offset %esi, -16
-; X86-ATOM-NEXT: .cfi_offset %edi, -12
-; X86-ATOM-NEXT: movl 8(%ebp), %esi
-; X86-ATOM-NEXT: movl 12(%ebp), %eax
-; X86-ATOM-NEXT: movl %esp, %ecx
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: .cfi_offset %esi, -20
+; X86-ATOM-NEXT: .cfi_offset %edi, -16
+; X86-ATOM-NEXT: .cfi_offset %ebx, -12
+; X86-ATOM-NEXT: movl 12(%ebp), %edi
+; X86-ATOM-NEXT: movl 12(%edi), %ecx
+; X86-ATOM-NEXT: movl 8(%edi), %edx
+; X86-ATOM-NEXT: movl (%edi), %esi
+; X86-ATOM-NEXT: movl 4(%edi), %ebx
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB4_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: movl %esi, (%esp)
+; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: pushl %eax
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: pushl %eax
-; X86-ATOM-NEXT: pushl %ecx
-; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: pushl %edi
+; X86-ATOM-NEXT: pushl $16
+; X86-ATOM-NEXT: calll __atomic_compare_exchange at PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl (%esp), %ecx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-ATOM-NEXT: movl %eax, 8(%esi)
-; X86-ATOM-NEXT: movl %edi, 12(%esi)
-; X86-ATOM-NEXT: movl %ecx, (%esi)
-; X86-ATOM-NEXT: movl %esi, %eax
-; X86-ATOM-NEXT: movl %edx, 4(%esi)
-; X86-ATOM-NEXT: leal -8(%ebp), %esp
+; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movl (%esp), %esi
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-ATOM-NEXT: je .LBB4_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: movl 8(%ebp), %eax
+; X86-ATOM-NEXT: movl %esi, (%eax)
+; X86-ATOM-NEXT: movl %ebx, 4(%eax)
+; X86-ATOM-NEXT: movl %edx, 8(%eax)
+; X86-ATOM-NEXT: movl %ecx, 12(%eax)
+; X86-ATOM-NEXT: leal -12(%ebp), %esp
; X86-ATOM-NEXT: popl %esi
; X86-ATOM-NEXT: popl %edi
+; X86-ATOM-NEXT: popl %ebx
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl $4
@@ -507,78 +520,120 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: callq __sync_fetch_and_or_16 at PLT
+; X64-NEXT: movl $5, %ecx
+; X64-NEXT: callq __atomic_fetch_or_16 at PLT
; X64-NEXT: popq %rax
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-SSE2-LABEL: or128_nouse_seq_cst:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-16, %esp
-; X86-SSE2-NEXT: subl $32, %esp
-; X86-SSE2-NEXT: movl %esp, %eax
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl 8(%ebp)
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
-; X86-SSE2-NEXT: addl $20, %esp
-; X86-SSE2-NEXT: movl %ebp, %esp
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
-; X86-SSE2-NEXT: retl
-;
-; X86-SLM-LABEL: or128_nouse_seq_cst:
-; X86-SLM: # %bb.0:
-; X86-SLM-NEXT: pushl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_offset 8
-; X86-SLM-NEXT: .cfi_offset %ebp, -8
-; X86-SLM-NEXT: movl %esp, %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
-; X86-SLM-NEXT: andl $-16, %esp
-; X86-SLM-NEXT: subl $32, %esp
-; X86-SLM-NEXT: movl 8(%ebp), %eax
-; X86-SLM-NEXT: movl %esp, %ecx
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl %eax
-; X86-SLM-NEXT: pushl %ecx
-; X86-SLM-NEXT: calll __sync_fetch_and_or_16
-; X86-SLM-NEXT: addl $20, %esp
-; X86-SLM-NEXT: movl %ebp, %esp
-; X86-SLM-NEXT: popl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
-; X86-SLM-NEXT: retl
+; X86-GENERIC-LABEL: or128_nouse_seq_cst:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: pushl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
+; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
+; X86-GENERIC-NEXT: movl %esp, %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
+; X86-GENERIC-NEXT: pushl %ebx
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: andl $-16, %esp
+; X86-GENERIC-NEXT: subl $48, %esp
+; X86-GENERIC-NEXT: .cfi_offset %esi, -20
+; X86-GENERIC-NEXT: .cfi_offset %edi, -16
+; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
+; X86-GENERIC-NEXT: movl 8(%ebp), %esi
+; X86-GENERIC-NEXT: movl 12(%esi), %ecx
+; X86-GENERIC-NEXT: movl 8(%esi), %edi
+; X86-GENERIC-NEXT: movl (%esi), %edx
+; X86-GENERIC-NEXT: movl 4(%esi), %ebx
+; X86-GENERIC-NEXT: .p2align 4, 0x90
+; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start
+; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-GENERIC-NEXT: movl %edx, (%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: pushl $5
+; X86-GENERIC-NEXT: pushl $5
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: pushl $16
+; X86-GENERIC-NEXT: calll __atomic_compare_exchange at PLT
+; X86-GENERIC-NEXT: addl $24, %esp
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-GENERIC-NEXT: movl (%esp), %edx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-GENERIC-NEXT: testb %al, %al
+; X86-GENERIC-NEXT: je .LBB12_1
+; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
+; X86-GENERIC-NEXT: leal -12(%ebp), %esp
+; X86-GENERIC-NEXT: popl %esi
+; X86-GENERIC-NEXT: popl %edi
+; X86-GENERIC-NEXT: popl %ebx
+; X86-GENERIC-NEXT: popl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
+; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or128_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
-; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: movl %esp, %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: pushl %ebx
+; X86-ATOM-NEXT: pushl %edi
+; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl 8(%ebp), %eax
-; X86-ATOM-NEXT: movl %esp, %ecx
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: .cfi_offset %esi, -20
+; X86-ATOM-NEXT: .cfi_offset %edi, -16
+; X86-ATOM-NEXT: .cfi_offset %ebx, -12
+; X86-ATOM-NEXT: movl 8(%ebp), %esi
+; X86-ATOM-NEXT: movl %esp, %ebx
+; X86-ATOM-NEXT: movl 12(%esi), %ecx
+; X86-ATOM-NEXT: movl 8(%esi), %edx
+; X86-ATOM-NEXT: movl (%esi), %eax
+; X86-ATOM-NEXT: movl 4(%esi), %edi
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB12_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: movl %eax, (%esp)
+; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: pushl $5
+; X86-ATOM-NEXT: pushl $5
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: pushl %eax
-; X86-ATOM-NEXT: pushl %ecx
-; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: pushl %ebx
+; X86-ATOM-NEXT: pushl %esi
+; X86-ATOM-NEXT: pushl $16
+; X86-ATOM-NEXT: calll __atomic_compare_exchange at PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl %ebp, %esp
+; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-ATOM-NEXT: movl (%esp), %eax
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-ATOM-NEXT: je .LBB12_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: leal -12(%ebp), %esp
+; X86-ATOM-NEXT: popl %esi
+; X86-ATOM-NEXT: popl %edi
+; X86-ATOM-NEXT: popl %ebx
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/atomic-nocx16.ll b/llvm/test/CodeGen/X86/atomic-nocx16.ll
index 5677541242a249..a014da80f189be 100644
--- a/llvm/test/CodeGen/X86/atomic-nocx16.ll
+++ b/llvm/test/CodeGen/X86/atomic-nocx16.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=corei7 -mattr=-cx16 | FileCheck %s
-; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK32 %s
;; Verify that 128-bit atomics emit a libcall without cx16
;; available.
@@ -10,25 +10,35 @@
; CHECK-LABEL: test:
define void @test(ptr %a) nounwind {
entry:
-; CHECK: __sync_val_compare_and_swap_16
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
%0 = cmpxchg ptr %a, i128 1, i128 1 seq_cst seq_cst
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_exchange_16
+; CHECK32: __atomic_exchange
%1 = atomicrmw xchg ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_add_16
+; CHECK: __atomic_fetch_add_16
+; CHECK32: __atomic_compare_exchange
%2 = atomicrmw add ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_sub_16
+; CHECK: __atomic_fetch_sub_16
+; CHECK32: __atomic_compare_exchange
%3 = atomicrmw sub ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_and_16
+; CHECK: __atomic_fetch_and_16
+; CHECK32: __atomic_compare_exchange
%4 = atomicrmw and ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_nand_16
+; CHECK: __atomic_fetch_nand_16
+; CHECK32: __atomic_compare_exchange
%5 = atomicrmw nand ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_or_16
+; CHECK: __atomic_fetch_or_16
+; CHECK32: __atomic_compare_exchange
%6 = atomicrmw or ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_xor_16
+; CHECK: __atomic_fetch_xor_16
+; CHECK32: __atomic_compare_exchange
%7 = atomicrmw xor ptr %a, i128 1 seq_cst
-; CHECK: __sync_val_compare_and_swap_16
+; CHECK: __atomic_load_16
+; CHECK32: __atomic_load
%8 = load atomic i128, ptr %a seq_cst, align 16
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_store_16
+; CHECK32: __atomic_store
store atomic i128 %8, ptr %a seq_cst, align 16
ret void
}
@@ -36,14 +46,20 @@ entry:
; CHECK-LABEL: test_fp:
define void @test_fp(fp128* %a) nounwind {
entry:
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_exchange_16
+; CHECK32: __atomic_exchange
%0 = atomicrmw xchg fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; Currently fails to compile:
-; %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; CHECK: __sync_val_compare_and_swap_16
- %1 = load atomic fp128, fp128* %a seq_cst, align 16
-; CHECK: __sync_lock_test_and_set_16
- store atomic fp128 %1, fp128* %a seq_cst, align 16
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
+ %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
+ %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
+; CHECK: __atomic_load_16
+; CHECK32: __atomic_load
+ %3 = load atomic fp128, fp128* %a seq_cst, align 16
+; CHECK: __atomic_store_16
+; CHECK32: __atomic_store
+ store atomic fp128 %3, fp128* %a seq_cst, align 16
ret void
}
diff --git a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
index 493c9a897f06b3..bc99caeea12b6c 100644
--- a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
+++ b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
@@ -1,44 +1,43 @@
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s
-; XFAIL: *
+; RUN: llc -mtriple=i386-linux-gnu -mcpu=i386 %s -o - | FileCheck %s
define i64 @test_add(ptr %addr, i64 %inc) {
; CHECK-LABEL: test_add:
-; CHECK: calll __sync_fetch_and_add_8
+; CHECK: calll __atomic_fetch_add_8
%old = atomicrmw add ptr %addr, i64 %inc seq_cst
ret i64 %old
}
define i64 @test_sub(ptr %addr, i64 %inc) {
; CHECK-LABEL: test_sub:
-; CHECK: calll __sync_fetch_and_sub_8
+; CHECK: calll __atomic_fetch_sub_8
%old = atomicrmw sub ptr %addr, i64 %inc seq_cst
ret i64 %old
}
define i64 @test_and(ptr %andr, i64 %inc) {
; CHECK-LABEL: test_and:
-; CHECK: calll __sync_fetch_and_and_8
+; CHECK: calll __atomic_fetch_and_8
%old = atomicrmw and ptr %andr, i64 %inc seq_cst
ret i64 %old
}
define i64 @test_or(ptr %orr, i64 %inc) {
; CHECK-LABEL: test_or:
-; CHECK: calll __sync_fetch_and_or_8
+; CHECK: calll __atomic_fetch_or_8
%old = atomicrmw or ptr %orr, i64 %inc seq_cst
ret i64 %old
}
define i64 @test_xor(ptr %xorr, i64 %inc) {
; CHECK-LABEL: test_xor:
-; CHECK: calll __sync_fetch_and_xor_8
+; CHECK: calll __atomic_fetch_xor_8
%old = atomicrmw xor ptr %xorr, i64 %inc seq_cst
ret i64 %old
}
define i64 @test_nand(ptr %nandr, i64 %inc) {
; CHECK-LABEL: test_nand:
-; CHECK: calll __sync_fetch_and_nand_8
+; CHECK: calll __atomic_fetch_nand_8
%old = atomicrmw nand ptr %nandr, i64 %inc seq_cst
ret i64 %old
}
diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll
index 97fa908f1b7148..930286c8e5fb34 100644
--- a/llvm/test/CodeGen/X86/atomic-xor.ll
+++ b/llvm/test/CodeGen/X86/atomic-xor.ll
@@ -22,32 +22,54 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %esi
-; X86-NEXT: movl %esp, %eax
-; X86-NEXT: pushl $-2147483648 # imm = 0x80000000
-; X86-NEXT: pushl $0
+; X86-NEXT: subl $48, %esp
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 12(%edi), %ecx
+; X86-NEXT: movl 8(%edi), %edx
+; X86-NEXT: movl (%edi), %ebx
+; X86-NEXT: movl 4(%edi), %esi
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: .LBB1_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %ebx, (%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
-; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl %eax
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl %eax
-; X86-NEXT: calll __sync_fetch_and_xor_16
-; X86-NEXT: addl $20, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl $16
+; X86-NEXT: calll __atomic_compare_exchange at PLT
+; X86-NEXT: addl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: movl (%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: testb %al, %al
+; X86-NEXT: je .LBB1_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
@@ -56,7 +78,8 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X64-NEXT: pushq %rax
; X64-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000
; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: callq __sync_fetch_and_xor_16 at PLT
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: callq __atomic_fetch_xor_16 at PLT
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%r = atomicrmw xor ptr %p, i128 170141183460469231731687303715884105728 monotonic
>From d9e98b3899a6f097d4076df2d6c9503265bfc286 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 11 Dec 2023 17:48:55 -0500
Subject: [PATCH 2/3] Add test-case for oversized atomic.
---
llvm/test/CodeGen/X86/atomic-oversize.ll | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/atomic-oversize.ll
diff --git a/llvm/test/CodeGen/X86/atomic-oversize.ll b/llvm/test/CodeGen/X86/atomic-oversize.ll
new file mode 100644
index 00000000000000..9c3a7878e20201
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-oversize.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86-64 -mattr=cx16 < %s | FileCheck %s
+
+; Atomics larger than 128-bit are unsupported, and emit libcalls.
+define void @test(ptr %a) nounwind {
+; CHECK-LABEL: test:
+; CHECK: callq __atomic_load
+; CHECK: callq __atomic_store
+ %1 = load atomic i256, ptr %a seq_cst, align 32
+ store atomic i256 %1, ptr %a seq_cst, align 32
+ ret void
+}
>From 3fb0ffdef9750bffb0540ebc60bbdcd639c2b14e Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 11 Dec 2023 22:15:07 -0500
Subject: [PATCH 3/3] Use -mtriple instead of -march in test.
---
.github/workflows/pr-code-format.yml | 2 +-
llvm/test/CodeGen/X86/atomic-oversize.ll | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index c27c282eb2a19a..d5b8fd9d8712a0 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -71,6 +71,6 @@ jobs:
python ./code-format-tools/llvm/utils/git/code-format-helper.py \
--token ${{ secrets.GITHUB_TOKEN }} \
--issue-number $GITHUB_PR_NUMBER \
- --start-rev $START_REV \
+ --start-rev $(git merge-base $START_REV $END_REV) \
--end-rev $END_REV \
--changed-files "$CHANGED_FILES"
diff --git a/llvm/test/CodeGen/X86/atomic-oversize.ll b/llvm/test/CodeGen/X86/atomic-oversize.ll
index 9c3a7878e20201..93213ebc066747 100644
--- a/llvm/test/CodeGen/X86/atomic-oversize.ll
+++ b/llvm/test/CodeGen/X86/atomic-oversize.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=cx16 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64 -mattr=cx16 < %s | FileCheck %s
; Atomics larger than 128-bit are unsupported, and emit libcalls.
define void @test(ptr %a) nounwind {
More information about the llvm-commits
mailing list