[clang] [llvm] Use unaligned atomic load and stores on x86 (PR #79191)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 23 12:02:29 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/79191
>From 9d8ca53e5439f838eea7c8d8531cac6c27df2c47 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 23 Jan 2024 13:59:05 -0500
Subject: [PATCH] Use unaligned atomic load and stores on x86
The backend supports it now, so we can use it.
---
clang/lib/CodeGen/CGObjC.cpp | 5 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
llvm/test/CodeGen/X86/unaligned-atomic-ops.ll | 92 +++++++++++++++++++
3 files changed, 96 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 03fc0ec7ff54e1c..debfc84f49e4848 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -846,8 +846,9 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
/// accesses. They don't have to be fast, just faster than a function
/// call and a mutex.
static bool hasUnalignedAtomics(llvm::Triple::ArchType arch) {
- // FIXME: Allow unaligned atomic load/store on x86. (It is not
- // currently supported by the backend.)
+ // x86 is the only one so far that we know support this as of now
+ if (arch == llvm::Triple::x86 || arch == llvm::Triple::x86_64)
+ return true;
return false;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e158312caffdec7..9b5128cc1361147 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -107,6 +107,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
+ setSupportsUnalignedAtomics(true);
// Bypass expensive divides and use cheaper ones.
if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
diff --git a/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
new file mode 100644
index 000000000000000..9e5173ff2b37e61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=CORE2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=COREI7 %s
+
+; This verifies that the middle end can handle an unaligned atomic load.
+;
+; In the past, an assertion inside the SelectionDAGBuilder would always
+; hit an assertion for unaligned loads and stores.
+
+%AtomicI16 = type { %CellI16, [0 x i8] }
+%CellI16 = type { i16, [0 x i8] }
+
+; CHECK-LABEL: foo
+; CHECK: ret
+define void @foo(%AtomicI16* %self) {
+; I386-LABEL: foo:
+; I386: ## %bb.0: ## %start
+; I386-NEXT: pushl %esi
+; I386-NEXT: .cfi_def_cfa_offset 8
+; I386-NEXT: subl $24, %esp
+; I386-NEXT: .cfi_def_cfa_offset 32
+; I386-NEXT: .cfi_offset %esi, -8
+; I386-NEXT: movl {{[0-9]+}}(%esp), %esi
+; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $5, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $2, (%esp)
+; I386-NEXT: calll ___atomic_load
+; I386-NEXT: movw $5, {{[0-9]+}}(%esp)
+; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $5, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $2, (%esp)
+; I386-NEXT: calll ___atomic_store
+; I386-NEXT: addl $24, %esp
+; I386-NEXT: popl %esi
+; I386-NEXT: retl
+;
+; CORE2-LABEL: foo:
+; CORE2: ## %bb.0: ## %start
+; CORE2-NEXT: pushq %rbx
+; CORE2-NEXT: .cfi_def_cfa_offset 16
+; CORE2-NEXT: subq $16, %rsp
+; CORE2-NEXT: .cfi_def_cfa_offset 32
+; CORE2-NEXT: .cfi_offset %rbx, -16
+; CORE2-NEXT: movq %rdi, %rbx
+; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CORE2-NEXT: movl $2, %edi
+; CORE2-NEXT: movq %rbx, %rsi
+; CORE2-NEXT: movl $5, %ecx
+; CORE2-NEXT: callq ___atomic_load
+; CORE2-NEXT: movw $5, {{[0-9]+}}(%rsp)
+; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CORE2-NEXT: movl $2, %edi
+; CORE2-NEXT: movq %rbx, %rsi
+; CORE2-NEXT: movl $5, %ecx
+; CORE2-NEXT: callq ___atomic_store
+; CORE2-NEXT: addq $16, %rsp
+; CORE2-NEXT: popq %rbx
+; CORE2-NEXT: retq
+;
+; COREI7-LABEL: foo:
+; COREI7: ## %bb.0: ## %start
+; COREI7-NEXT: pushq %rbx
+; COREI7-NEXT: .cfi_def_cfa_offset 16
+; COREI7-NEXT: subq $16, %rsp
+; COREI7-NEXT: .cfi_def_cfa_offset 32
+; COREI7-NEXT: .cfi_offset %rbx, -16
+; COREI7-NEXT: movq %rdi, %rbx
+; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; COREI7-NEXT: movl $2, %edi
+; COREI7-NEXT: movq %rbx, %rsi
+; COREI7-NEXT: movl $5, %ecx
+; COREI7-NEXT: callq ___atomic_load
+; COREI7-NEXT: movw $5, {{[0-9]+}}(%rsp)
+; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; COREI7-NEXT: movl $2, %edi
+; COREI7-NEXT: movq %rbx, %rsi
+; COREI7-NEXT: movl $5, %ecx
+; COREI7-NEXT: callq ___atomic_store
+; COREI7-NEXT: addq $16, %rsp
+; COREI7-NEXT: popq %rbx
+; COREI7-NEXT: retq
+start:
+ %a = getelementptr inbounds %AtomicI16, %AtomicI16* %self, i16 0, i32 0, i32 0
+ load atomic i16, i16* %a seq_cst, align 1
+ store atomic i16 5, i16* %a seq_cst, align 1
+ ret void
+}
More information about the cfe-commits
mailing list