[clang] [llvm] [X86] Use unaligned atomic load and stores (PR #79191)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 24 11:38:00 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/79191
>From 65d41b9da086a7625e63e0a87c7fd8665fa4e154 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 23 Jan 2024 13:59:05 -0500
Subject: [PATCH] [X86] Use unaligned atomic load and stores
The backend supports it now, so we can use it.
---
clang/lib/CodeGen/CGObjC.cpp | 5 +-
clang/test/CodeGenObjC/objc_copyStruct.m | 3 +-
clang/test/CodeGenObjC/property-aggregate.m | 15 +--
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
llvm/test/CodeGen/X86/unaligned-atomic-ops.ll | 92 +++++++++++++++++++
5 files changed, 99 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 03fc0ec7ff54e1c..debfc84f49e4848 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -846,8 +846,9 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
/// accesses. They don't have to be fast, just faster than a function
/// call and a mutex.
static bool hasUnalignedAtomics(llvm::Triple::ArchType arch) {
- // FIXME: Allow unaligned atomic load/store on x86. (It is not
- // currently supported by the backend.)
+ // x86 is the only one so far that we know support this as of now
+ if (arch == llvm::Triple::x86 || arch == llvm::Triple::x86_64)
+ return true;
return false;
}
diff --git a/clang/test/CodeGenObjC/objc_copyStruct.m b/clang/test/CodeGenObjC/objc_copyStruct.m
index 7bbad866e2b1fb2..8e52815a308abcb 100644
--- a/clang/test/CodeGenObjC/objc_copyStruct.m
+++ b/clang/test/CodeGenObjC/objc_copyStruct.m
@@ -2,7 +2,7 @@
// RUN: %clang -target x86_64-apple-ios -fobjc-runtime=ios -Wno-objc-root-class -S -o - -emit-llvm %s | FileCheck %s
struct S {
- float f, g;
+ double f, g;
};
@interface I
@@ -13,4 +13,3 @@ @implementation I
@end
// CHECK: declare {{.*}}void @objc_copyStruct(ptr, ptr, i64, i1, i1)
-
diff --git a/clang/test/CodeGenObjC/property-aggregate.m b/clang/test/CodeGenObjC/property-aggregate.m
index f4211b6b62bd503..4c8c8893f920f4a 100644
--- a/clang/test/CodeGenObjC/property-aggregate.m
+++ b/clang/test/CodeGenObjC/property-aggregate.m
@@ -1,13 +1,8 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
-// This structure's size is not a power of two, so the property does
-// not get native atomics, even though x86-64 can do unaligned atomics
-// with a lock prefix.
struct s3 { char c[3]; };
// This structure's size is, so it does, because it can.
-// FIXME: But we don't at the moment; the backend doesn't know how to generate
-// correct code.
struct s4 { char c[4]; };
@interface Test0
@@ -18,14 +13,8 @@ @implementation Test0
@synthesize s3, s4;
@end
-// CHECK: define internal i24 @"\01-[Test0 s3]"(
-// CHECK: call void @objc_copyStruct
-// CHECK: define internal void @"\01-[Test0 setS3:]"(
-// CHECK: call void @objc_copyStruct
-// CHECK: define internal i32 @"\01-[Test0 s4]"(
-// CHECK: call void @objc_copyStruct
-// CHECK: define internal void @"\01-[Test0 setS4:]"(
-// CHECK: call void @objc_copyStruct
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5cc2803b2808797..eacaf6bbd2296ae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -107,6 +107,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
+ setSupportsUnalignedAtomics(true);
// Bypass expensive divides and use cheaper ones.
if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
diff --git a/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
new file mode 100644
index 000000000000000..9e5173ff2b37e61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=CORE2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=COREI7 %s
+
+; This verifies that the middle end can handle an unaligned atomic load.
+;
+; In the past, an assertion inside the SelectionDAGBuilder would always
+; hit an assertion for unaligned loads and stores.
+
+%AtomicI16 = type { %CellI16, [0 x i8] }
+%CellI16 = type { i16, [0 x i8] }
+
+; CHECK-LABEL: foo
+; CHECK: ret
+define void @foo(%AtomicI16* %self) {
+; I386-LABEL: foo:
+; I386: ## %bb.0: ## %start
+; I386-NEXT: pushl %esi
+; I386-NEXT: .cfi_def_cfa_offset 8
+; I386-NEXT: subl $24, %esp
+; I386-NEXT: .cfi_def_cfa_offset 32
+; I386-NEXT: .cfi_offset %esi, -8
+; I386-NEXT: movl {{[0-9]+}}(%esp), %esi
+; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $5, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $2, (%esp)
+; I386-NEXT: calll ___atomic_load
+; I386-NEXT: movw $5, {{[0-9]+}}(%esp)
+; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $5, {{[0-9]+}}(%esp)
+; I386-NEXT: movl $2, (%esp)
+; I386-NEXT: calll ___atomic_store
+; I386-NEXT: addl $24, %esp
+; I386-NEXT: popl %esi
+; I386-NEXT: retl
+;
+; CORE2-LABEL: foo:
+; CORE2: ## %bb.0: ## %start
+; CORE2-NEXT: pushq %rbx
+; CORE2-NEXT: .cfi_def_cfa_offset 16
+; CORE2-NEXT: subq $16, %rsp
+; CORE2-NEXT: .cfi_def_cfa_offset 32
+; CORE2-NEXT: .cfi_offset %rbx, -16
+; CORE2-NEXT: movq %rdi, %rbx
+; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CORE2-NEXT: movl $2, %edi
+; CORE2-NEXT: movq %rbx, %rsi
+; CORE2-NEXT: movl $5, %ecx
+; CORE2-NEXT: callq ___atomic_load
+; CORE2-NEXT: movw $5, {{[0-9]+}}(%rsp)
+; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CORE2-NEXT: movl $2, %edi
+; CORE2-NEXT: movq %rbx, %rsi
+; CORE2-NEXT: movl $5, %ecx
+; CORE2-NEXT: callq ___atomic_store
+; CORE2-NEXT: addq $16, %rsp
+; CORE2-NEXT: popq %rbx
+; CORE2-NEXT: retq
+;
+; COREI7-LABEL: foo:
+; COREI7: ## %bb.0: ## %start
+; COREI7-NEXT: pushq %rbx
+; COREI7-NEXT: .cfi_def_cfa_offset 16
+; COREI7-NEXT: subq $16, %rsp
+; COREI7-NEXT: .cfi_def_cfa_offset 32
+; COREI7-NEXT: .cfi_offset %rbx, -16
+; COREI7-NEXT: movq %rdi, %rbx
+; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; COREI7-NEXT: movl $2, %edi
+; COREI7-NEXT: movq %rbx, %rsi
+; COREI7-NEXT: movl $5, %ecx
+; COREI7-NEXT: callq ___atomic_load
+; COREI7-NEXT: movw $5, {{[0-9]+}}(%rsp)
+; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; COREI7-NEXT: movl $2, %edi
+; COREI7-NEXT: movq %rbx, %rsi
+; COREI7-NEXT: movl $5, %ecx
+; COREI7-NEXT: callq ___atomic_store
+; COREI7-NEXT: addq $16, %rsp
+; COREI7-NEXT: popq %rbx
+; COREI7-NEXT: retq
+start:
+ %a = getelementptr inbounds %AtomicI16, %AtomicI16* %self, i16 0, i32 0, i32 0
+ load atomic i16, i16* %a seq_cst, align 1
+ store atomic i16 5, i16* %a seq_cst, align 1
+ ret void
+}
More information about the cfe-commits
mailing list