[llvm] [WoA] Remove extra barriers after ARM LSE instructions with MSVC (PR #169596)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 18:52:06 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Usman Nadeem (UsmanNadeem)
<details>
<summary>Changes</summary>
https://github.com/llvm/llvm-project/commit/c9821abfc023fba684c8ef8589c49cba8083f579 added extra fences after sequentially consistent stores for compatibility with MSVC's seq_cst loads (ldr+dmb). These extra fences should not be needed for ARM LSE instructions that have both acquire+release semantics, which results in a two way barrier, and should be enough for sequential consistency.
Fixes https://github.com/llvm/llvm-project/issues/162345
Change-Id: I9148c73d0dcf3bf1b18a0915f96cac71ac1800f2
---
Patch is 811.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169596.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+1-2)
- (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+7-16)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+26-14)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1-2)
- (modified) llvm/test/CodeGen/AArch64/atomic-ops-lse.ll (+7681-2499)
- (removed) llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll (-1019)
- (modified) llvm/test/CodeGen/AArch64/atomic-ops.ll (+1228-121)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7df5d8a09f0f6..d901fa2f20055 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2245,8 +2245,7 @@ class LLVM_ABI TargetLoweringBase {
/// Whether AtomicExpandPass should automatically insert a trailing fence
/// without reducing the ordering for this atomic. Defaults to false.
- virtual bool
- shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
+ virtual bool storeNeedsSeqCstTrailingFence(Instruction *I) const {
return false;
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index d9bc042d6807e..aec008c570e2a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -345,21 +345,13 @@ bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
if (FenceOrdering != AtomicOrdering::Monotonic) {
MadeChange |= bracketInstWithFences(I, FenceOrdering);
}
- } else if (I->hasAtomicStore() &&
- TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
- auto FenceOrdering = AtomicOrdering::Monotonic;
- if (SI)
- FenceOrdering = SI->getOrdering();
- else if (RMWI)
- FenceOrdering = RMWI->getOrdering();
- else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
- TargetLoweringBase::AtomicExpansionKind::LLSC)
- // LLSC is handled in expandAtomicCmpXchg().
- FenceOrdering = CASI->getSuccessOrdering();
-
+ } else if (TLI->storeNeedsSeqCstTrailingFence(I) &&
+ !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+ TargetLoweringBase::AtomicExpansionKind::LLSC)) {
+ // CmpXchg LLSC is handled in expandAtomicCmpXchg().
IRBuilder Builder(I);
- if (auto TrailingFence =
- TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+ if (auto TrailingFence = TLI->emitTrailingFence(
+ Builder, I, AtomicOrdering::SequentiallyConsistent)) {
TrailingFence->moveAfter(I);
MadeChange = true;
}
@@ -1511,8 +1503,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Make sure later instructions don't get reordered with a fence if
// necessary.
Builder.SetInsertPoint(SuccessBB);
- if (ShouldInsertFencesForAtomic ||
- TLI->shouldInsertTrailingFenceForAtomicStore(CI))
+ if (ShouldInsertFencesForAtomic || TLI->storeNeedsSeqCstTrailingFence(CI))
TLI->emitTrailingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(ExitBB);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 83ce39fa314d1..a99413fe03431 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29446,8 +29446,8 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic(
return false;
}
-bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
- const Instruction *I) const {
+bool AArch64TargetLowering::storeNeedsSeqCstTrailingFence(
+ Instruction *I) const {
// Store-Release instructions only provide seq_cst guarantees when paired with
// Load-Acquire instructions. MSVC CRT does not use these instructions to
// implement seq_cst loads and stores, so we need additional explicit fences
@@ -29455,19 +29455,31 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return false;
- switch (I->getOpcode()) {
- default:
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return SI->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+
+ auto *CAS = dyn_cast<AtomicCmpXchgInst>(I);
+ auto *RMW = dyn_cast<AtomicRMWInst>(I);
+ // Not a store.
+ if (!CAS && !RMW)
return false;
- case Instruction::AtomicCmpXchg:
- return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
- AtomicOrdering::SequentiallyConsistent;
- case Instruction::AtomicRMW:
- return cast<AtomicRMWInst>(I)->getOrdering() ==
- AtomicOrdering::SequentiallyConsistent;
- case Instruction::Store:
- return cast<StoreInst>(I)->getOrdering() ==
- AtomicOrdering::SequentiallyConsistent;
- }
+
+ // Fence only needed for seq_cst.
+ if (CAS &&
+ CAS->getSuccessOrdering() != AtomicOrdering::SequentiallyConsistent)
+ return false;
+ if (RMW && RMW->getOrdering() != AtomicOrdering::SequentiallyConsistent)
+ return false;
+
+ // We do not need a fence only if we have LSE and are not expanding.
+ TargetLoweringBase::AtomicExpansionKind ExpandKind =
+ CAS ? shouldExpandAtomicCmpXchgInIR(CAS) : shouldExpandAtomicRMWInIR(RMW);
+ if (ExpandKind == AtomicExpansionKind::None && Subtarget->hasLSE())
+ return false;
+ if (RMW && ExpandKind == AtomicExpansionKind::CmpXChg && Subtarget->hasLSE())
+ return false;
+
+ return true;
}
// Loads and stores less than 128-bits are already atomic; ones above that
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ca08eb40c956a..8a99fcad212c2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -349,8 +349,7 @@ class AArch64TargetLowering : public TargetLowering {
bool isOpSuitableForLSE128(const Instruction *I) const;
bool isOpSuitableForRCPC3(const Instruction *I) const;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
- bool
- shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
+ bool storeNeedsSeqCstTrailingFence(Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index 70f3b5cc488ea..e784042ebb1d3 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -1,13 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix="CHECK-REG" --allow-unused-prefixes --implicit-check-not="stlxrb {{w|x}}[[NEW:[0-9]+]], {{w|x}}[[NEW:[0-9]+]]], [x{{[0-9]+}}]"
-; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
+; Point of implicit-check-not is to make sure UNPREDICTABLE instructions aren't created
; (i.e. reusing a register for status & data in store exclusive).
-; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], w[[NEW]], [x{{[0-9]+}}]
-; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], x[[NEW]], [x{{[0-9]+}}]
+; CHECK-REG: {{.*}}
+
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN: -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --implicit-check-not="dmb"
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN: -mattr=+lse -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --implicit-check-not="dmb"
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN: -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=MSVC-OUTLINE-ATOMICS
+
@var8 = dso_local global i8 0
@var16 = dso_local global i16 0
@@ -17,6 +25,12 @@
define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var8
+; CHECK-NEXT: add x8, x8, :lo12:var8
+; CHECK-NEXT: ldaddalb w0, w0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -25,19 +39,30 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw add ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i8 %old
}
define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var16
+; CHECK-NEXT: add x8, x8, :lo12:var16
+; CHECK-NEXT: ldaddalh w0, w0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -46,19 +71,30 @@ define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw add ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i16 %old
}
define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var32
+; CHECK-NEXT: add x8, x8, :lo12:var32
+; CHECK-NEXT: ldaddal w0, w0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -67,19 +103,30 @@ define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i32 %old
}
define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var64
+; CHECK-NEXT: add x8, x8, :lo12:var64
+; CHECK-NEXT: ldaddal x0, x0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -88,19 +135,30 @@ define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i64 %old
}
define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i32_noret:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var32
+; CHECK-NEXT: add x8, x8, :lo12:var32
+; CHECK-NEXT: ldaddal w0, w8, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -109,18 +167,29 @@ define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret void
}
define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i64_noret:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var64
+; CHECK-NEXT: add x8, x8, :lo12:var64
+; CHECK-NEXT: ldaddal x0, x8, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -129,18 +198,29 @@ define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret void
}
define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_or_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var8
+; CHECK-NEXT: add x8, x8, :lo12:var8
+; CHECK-NEXT: ldsetalb w0, w0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -149,19 +229,30 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw or ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i8 %old
}
define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_or_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var16
+; CHECK-NEXT: add x8, x8, :lo12:var16
+; CHECK-NEXT: ldsetalh w0, w0, [x8]
+; CHECK-NEXT: ret
+;
; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16:
; OUTLINE-ATOMICS: // %bb.0:
; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -170,19 +261,30 @@ define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel
; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; OUTLINE-ATOMICS-NEXT: ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16:
+; MSVC-OUTLINE-ATOMICS: // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT: adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT: dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT: ret
%old = atomicrmw or ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
ret i16 %old
}
define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_or_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, var32
+; CHECK-NEXT: add x8, x8, :lo12:var32
+; CHECK-NEXT: ldsetal w0, w0, [x8]
+; CHECK-NEXT: ret...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169596
More information about the llvm-commits
mailing list