[llvm] [SPARC] Weaken emitted barriers for atomic ops (PR #154950)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 05:18:34 PDT 2025
https://github.com/koachan updated https://github.com/llvm/llvm-project/pull/154950
>From 928bf41991324bcf225bb433afb1eff4e4dc173e Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Sun, 17 Aug 2025 10:47:08 +0700
Subject: [PATCH 1/8] [WIP][SPARC] Weaken emitted barriers for atomic ops
Weaken barriers for atomic ops just to the form that's enough to enforce
the constraints, but no stronger. In particular, we try to avoid emitting
expensive #StoreLoad barriers whenever possible.
The barriers emitted still conform to V9's RMO and V8's PSO memory model,
and is compatible with GCC's lowering.
---
llvm/lib/Target/Sparc/SparcISelLowering.cpp | 25 ++
llvm/lib/Target/Sparc/SparcISelLowering.h | 5 +
llvm/lib/Target/Sparc/SparcInstrInfo.td | 26 +-
llvm/test/CodeGen/SPARC/atomics-ordering.ll | 289 ++++++++++++++++++++
4 files changed, 342 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/SPARC/atomics-ordering.ll
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index dd221327dbdc6..e59b05817a3b0 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -33,7 +33,10 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -3562,3 +3565,25 @@ void SparcTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (!Node->hasAnyUseOfValue(0))
MI.getOperand(0).setReg(SP::G0);
}
+
+Instruction *SparcTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ bool HasStoreSemantics = isa<AtomicRMWInst>(Inst) || isa<StoreInst>(Inst);
+ if (HasStoreSemantics && isReleaseOrStronger(Ord))
+ return Builder.CreateFence(AtomicOrdering::Release);
+ return nullptr;
+}
+
+Instruction *SparcTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ bool HasLoadSemantics = isa<AtomicRMWInst>(Inst) || isa<LoadInst>(Inst);
+ if (HasLoadSemantics && isAcquireOrStronger(Ord))
+ return Builder.CreateFence(AtomicOrdering::Acquire);
+
+ // SC plain stores would need a trailing full barrier.
+ if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+ return Builder.CreateFence(Ord);
+ return nullptr;
+}
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 4017beb88ff31..73bd8ff6b24a4 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -182,6 +182,11 @@ namespace llvm {
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
// FIXME: We insert fences for each atomics and generate
// sub-optimal code for PSO/TSO. (Approximately nobody uses any
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 1a32eafb0e83d..ff5af4bcd03cc 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1957,12 +1957,32 @@ def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>;
def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>;
-// store bar for all atomic_fence in V8.
-let Predicates = [HasNoV9] in
+// All load-type operations in V8 comes with implicit acquire semantics.
+let Predicates = [HasNoV9] in {
+ // Acquire -> nothing
+ // FIXME how to prevent this from actually emitting anything?
+ def : Pat<(atomic_fence (i32 4), timm), (NOP)>;
+ // Release / AcqRel -> stbar
+ def : Pat<(atomic_fence (i32 5), timm), (STBAR)>;
+ // AcqRel and stronger -> stbar; ldstub [%sp-1], %g0
+ // FIXME how to actually emit the ldstub?
def : Pat<(atomic_fence timm, timm), (STBAR)>;
+}
-let Predicates = [HasV9] in
+// We have to handle both 32 and 64-bit cases.
+let Predicates = [HasV9] in {
+ // Acquire -> membar #LoadLoad | #LoadStore
+ def : Pat<(atomic_fence (i32 4), timm), (MEMBARi 0x5)>;
+ def : Pat<(atomic_fence (i64 4), timm), (MEMBARi 0x5)>;
+ // Release -> membar #LoadStore | #StoreStore
+ def : Pat<(atomic_fence (i32 5), timm), (MEMBARi 0xc)>;
+ def : Pat<(atomic_fence (i64 5), timm), (MEMBARi 0xc)>;
+ // AcqRel -> membar #LoadLoad | #LoadStore | #StoreStore
+ def : Pat<(atomic_fence (i32 6), timm), (MEMBARi 0xd)>;
+ def : Pat<(atomic_fence (i64 6), timm), (MEMBARi 0xd)>;
+ // SeqCst -> membar #StoreLoad | #LoadLoad | #LoadStore | #StoreStore
def : Pat<(atomic_fence timm, timm), (MEMBARi 0xf)>;
+}
// atomic_load addr -> load addr
def : Pat<(i32 (atomic_load_azext_8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
new file mode 100644
index 0000000000000..e3decd56ee613
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32
+; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4
+; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9
+; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64
+
+define i32 @load_acq(ptr %0) nounwind {
+; SPARC32-LABEL: load_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 2, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: nop
+;
+; SPARC32-V9-LABEL: load_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 acquire, align 4
+ ret i32 %2
+}
+
+define i32 @load_sc(ptr %0) nounwind {
+; SPARC32-LABEL: load_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 5, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: nop
+;
+; SPARC32-V9-LABEL: load_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 seq_cst, align 4
+ ret i32 %2
+}
+
+define void @store_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+;
+; SPARC32-V9-LABEL: store_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: st %o1, [%o0]
+;
+; SPARC64-LABEL: store_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: st %o1, [%o0]
+ store atomic i32 %1, ptr %0 release, align 4
+ ret void
+}
+
+define void @store_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: nop
+;
+; SPARC32-V9-LABEL: store_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: st %o1, [%o0]
+; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: store_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: st %o1, [%o0]
+; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ store atomic i32 %1, ptr %0 seq_cst, align 4
+ ret void
+}
+
+define i32 @rmw_acq(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 2, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 4, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4
+ ret i32 %3
+}
>From b21acd74c4883ce6f350ae37c6ab561dae197dc3 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Thu, 21 Aug 2025 23:43:58 +0700
Subject: [PATCH 2/8] Update tests
---
.../CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 380a4a0a6b870..d1f1c46d9b8b1 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o4, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB2_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
@@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %g2, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB6_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst
>From d78147f08bb047ee6cae49a360bbf5e4de644d09 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Fri, 22 Aug 2025 20:34:58 +0700
Subject: [PATCH 3/8] Don't emit acq barriers on V8
---
llvm/lib/Target/Sparc/SparcISelLowering.cpp | 4 +++-
llvm/lib/Target/Sparc/SparcInstrInfo.td | 3 +--
llvm/test/CodeGen/SPARC/atomics-ordering.ll | 11 ++---------
3 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index e59b05817a3b0..a926fe56a61ef 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -3578,8 +3578,10 @@ Instruction *SparcTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *SparcTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
+ // V8 loads already come with implicit acquire barrier so there's no need to
+ // emit it again.
bool HasLoadSemantics = isa<AtomicRMWInst>(Inst) || isa<LoadInst>(Inst);
- if (HasLoadSemantics && isAcquireOrStronger(Ord))
+ if (Subtarget->isV9() && HasLoadSemantics && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
// SC plain stores would need a trailing full barrier.
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index ff5af4bcd03cc..f427f6bfba63b 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1959,8 +1959,7 @@ def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>;
// All load-type operations in V8 comes with implicit acquire semantics.
let Predicates = [HasNoV9] in {
- // Acquire -> nothing
- // FIXME how to prevent this from actually emitting anything?
+ // Acquire -> nop
def : Pat<(atomic_fence (i32 4), timm), (NOP)>;
// Release / AcqRel -> stbar
def : Pat<(atomic_fence (i32 5), timm), (STBAR)>;
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
index e3decd56ee613..25a370b325302 100644
--- a/llvm/test/CodeGen/SPARC/atomics-ordering.ll
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -16,10 +16,8 @@ define i32 @load_acq(ptr %0) nounwind {
;
; SPARC32-LEON4-LABEL: load_acq:
; SPARC32-LEON4: ! %bb.0:
-; SPARC32-LEON4-NEXT: ld [%o0], %o0
-; SPARC32-LEON4-NEXT: nop
; SPARC32-LEON4-NEXT: retl
-; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
;
; SPARC32-V9-LABEL: load_acq:
; SPARC32-V9: ! %bb.0:
@@ -50,10 +48,8 @@ define i32 @load_sc(ptr %0) nounwind {
;
; SPARC32-LEON4-LABEL: load_sc:
; SPARC32-LEON4: ! %bb.0:
-; SPARC32-LEON4-NEXT: ld [%o0], %o0
-; SPARC32-LEON4-NEXT: nop
; SPARC32-LEON4-NEXT: retl
-; SPARC32-LEON4-NEXT: nop
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
;
; SPARC32-V9-LABEL: load_sc:
; SPARC32-V9: ! %bb.0:
@@ -156,7 +152,6 @@ define i32 @rmw_acq(ptr %0, i32 %1) nounwind {
; SPARC32-LEON4-LABEL: rmw_acq:
; SPARC32-LEON4: ! %bb.0:
; SPARC32-LEON4-NEXT: swap [%o0], %o1
-; SPARC32-LEON4-NEXT: nop
; SPARC32-LEON4-NEXT: retl
; SPARC32-LEON4-NEXT: mov %o1, %o0
;
@@ -227,7 +222,6 @@ define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind {
; SPARC32-LEON4: ! %bb.0:
; SPARC32-LEON4-NEXT: stbar
; SPARC32-LEON4-NEXT: swap [%o0], %o1
-; SPARC32-LEON4-NEXT: nop
; SPARC32-LEON4-NEXT: retl
; SPARC32-LEON4-NEXT: mov %o1, %o0
;
@@ -265,7 +259,6 @@ define i32 @rmw_sc(ptr %0, i32 %1) nounwind {
; SPARC32-LEON4: ! %bb.0:
; SPARC32-LEON4-NEXT: stbar
; SPARC32-LEON4-NEXT: swap [%o0], %o1
-; SPARC32-LEON4-NEXT: nop
; SPARC32-LEON4-NEXT: retl
; SPARC32-LEON4-NEXT: mov %o1, %o0
;
>From cc5db003eba349331e75f3b41e7ce97e02e0e33b Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Fri, 22 Aug 2025 23:53:47 +0700
Subject: [PATCH 4/8] Update opt tests
---
.../Transforms/AtomicExpand/SPARC/partword.ll | 28 ++++++++-----------
1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
index 3a306a4d98613..b50c79d13b770 100644
--- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
+++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
@@ -12,7 +12,6 @@ target triple = "sparcv9-unknown-unknown"
define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) {
; CHECK-LABEL: @test_cmpxchg_i8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -45,7 +44,6 @@ define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) {
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1
-; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0
; CHECK-NEXT: ret i8 [[RET]]
;
@@ -58,7 +56,6 @@ entry:
define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
; CHECK-LABEL: @test_cmpxchg_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -91,7 +88,6 @@ define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
-; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
; CHECK-NEXT: ret i16 [[RET]]
;
@@ -104,7 +100,7 @@ entry:
define i16 @test_add_i16(ptr %arg, i16 %val) {
; CHECK-LABEL: @test_add_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -130,7 +126,7 @@ define i16 @test_add_i16(ptr %arg, i16 %val) {
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
entry:
@@ -141,7 +137,7 @@ entry:
define i16 @test_xor_i16(ptr %arg, i16 %val) {
; CHECK-LABEL: @test_xor_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -164,7 +160,7 @@ define i16 @test_xor_i16(ptr %arg, i16 %val) {
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
entry:
@@ -175,7 +171,7 @@ entry:
define i16 @test_or_i16(ptr %arg, i16 %val) {
; CHECK-LABEL: @test_or_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -198,7 +194,7 @@ define i16 @test_or_i16(ptr %arg, i16 %val) {
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
entry:
@@ -209,7 +205,7 @@ entry:
define i16 @test_and_i16(ptr %arg, i16 %val) {
; CHECK-LABEL: @test_and_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -233,7 +229,7 @@ define i16 @test_and_i16(ptr %arg, i16 %val) {
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
entry:
@@ -244,7 +240,7 @@ entry:
define i16 @test_min_i16(ptr %arg, i16 %val) {
; CHECK-LABEL: @test_min_i16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -272,7 +268,7 @@ define i16 @test_min_i16(ptr %arg, i16 %val) {
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
entry:
@@ -282,7 +278,7 @@ entry:
define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_f16(
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence release
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
@@ -312,7 +308,7 @@ define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) {
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half
-; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret half [[TMP8]]
;
%res = atomicrmw fadd ptr %ptr, half %value seq_cst
>From 5e80c1e696005a7efa1d0d1b843bdc9beb03be77 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Thu, 28 Aug 2025 01:34:12 +0700
Subject: [PATCH 5/8] Use Pseudo to emit V8 full barrier
---
llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 25 +++++++++++++++++++++
llvm/lib/Target/Sparc/SparcInstrInfo.td | 5 ++++-
llvm/test/CodeGen/SPARC/atomics-ordering.ll | 1 +
3 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 8e7e2e5f73709..84b74066be0a2 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -70,6 +70,8 @@ class SparcAsmPrinter : public AsmPrinter {
void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
const MCSubtargetInfo &STI);
+ void LowerV8Bar(const MachineInstr *MI, const MCSubtargetInfo &STI);
+
MCOperand lowerOperand(const MachineOperand &MO) const;
private:
@@ -261,6 +263,26 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
EmitADD(*OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
}
+void SparcAsmPrinter::LowerV8Bar(const MachineInstr *MI,
+ const MCSubtargetInfo &STI) {
+ assert(!STI.hasFeature(Sparc::FeatureV9) &&
+ "V8BAR should not be emitted on V9 processors!");
+
+ MCInst STBARInst;
+ MCInst LDSTUBInst;
+
+ // Emit stbar; ldstub [%sp-1], %g0
+ // The sequence acts as a full barrier on V8 systems.
+ STBARInst.setOpcode(SP::STBAR);
+ LDSTUBInst.setOpcode(SP::LDSTUBri);
+ LDSTUBInst.addOperand(MCOperand::createReg(SP::G0));
+ LDSTUBInst.addOperand(MCOperand::createReg(SP::O6));
+ LDSTUBInst.addOperand(MCOperand::createImm(-1));
+
+ OutStreamer->emitInstruction(STBARInst, STI);
+ OutStreamer->emitInstruction(LDSTUBInst, STI);
+}
+
MCOperand SparcAsmPrinter::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
default:
@@ -341,6 +363,9 @@ void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) {
case SP::GETPCX:
LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo());
return;
+ case SP::V8BAR:
+ LowerV8Bar(MI, getSubtargetInfo());
+ return;
}
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index f427f6bfba63b..d53f110ac4cc4 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -561,6 +561,9 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
let isPseudo = 1;
}
+// Full memory barrier for V8.
+def V8BAR : Pseudo<(outs), (ins), "!V8BAR", []>, Requires<[HasNoV9]>;
+
// GETPCX for PIC
let Defs = [O7] in {
def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
@@ -1965,7 +1968,7 @@ let Predicates = [HasNoV9] in {
def : Pat<(atomic_fence (i32 5), timm), (STBAR)>;
// AcqRel and stronger -> stbar; ldstub [%sp-1], %g0
// FIXME how to actually emit the ldstub?
- def : Pat<(atomic_fence timm, timm), (STBAR)>;
+ def : Pat<(atomic_fence timm, timm), (V8BAR)>;
}
// We have to handle both 32 and 64-bit cases.
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
index 25a370b325302..ebb281ac516b3 100644
--- a/llvm/test/CodeGen/SPARC/atomics-ordering.ll
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -116,6 +116,7 @@ define void @store_sc(ptr %0, i32 %1) nounwind {
; SPARC32-LEON4-NEXT: stbar
; SPARC32-LEON4-NEXT: st %o1, [%o0]
; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0
; SPARC32-LEON4-NEXT: retl
; SPARC32-LEON4-NEXT: nop
;
>From 894a6e00f1d252e81cf66e852aa8c8d5ec475d0d Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Thu, 28 Aug 2025 01:37:40 +0700
Subject: [PATCH 6/8] Remove FIXME comment
---
llvm/lib/Target/Sparc/SparcInstrInfo.td | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index d53f110ac4cc4..0495fbef94473 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1967,7 +1967,6 @@ let Predicates = [HasNoV9] in {
// Release / AcqRel -> stbar
def : Pat<(atomic_fence (i32 5), timm), (STBAR)>;
// AcqRel and stronger -> stbar; ldstub [%sp-1], %g0
- // FIXME how to actually emit the ldstub?
def : Pat<(atomic_fence timm, timm), (V8BAR)>;
}
>From 5a9fa67646fc7a9285cd5cfb6f9e8b37d01c6508 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Thu, 28 Aug 2025 18:25:38 +0700
Subject: [PATCH 7/8] Move V8BAR expansion to expandPostRAPseudo
---
llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 25 -----------------------
llvm/lib/Target/Sparc/SparcInstrInfo.cpp | 16 +++++++++++++++
2 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 84b74066be0a2..8e7e2e5f73709 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -70,8 +70,6 @@ class SparcAsmPrinter : public AsmPrinter {
void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
const MCSubtargetInfo &STI);
- void LowerV8Bar(const MachineInstr *MI, const MCSubtargetInfo &STI);
-
MCOperand lowerOperand(const MachineOperand &MO) const;
private:
@@ -263,26 +261,6 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
EmitADD(*OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
}
-void SparcAsmPrinter::LowerV8Bar(const MachineInstr *MI,
- const MCSubtargetInfo &STI) {
- assert(!STI.hasFeature(Sparc::FeatureV9) &&
- "V8BAR should not be emitted on V9 processors!");
-
- MCInst STBARInst;
- MCInst LDSTUBInst;
-
- // Emit stbar; ldstub [%sp-1], %g0
- // The sequence acts as a full barrier on V8 systems.
- STBARInst.setOpcode(SP::STBAR);
- LDSTUBInst.setOpcode(SP::LDSTUBri);
- LDSTUBInst.addOperand(MCOperand::createReg(SP::G0));
- LDSTUBInst.addOperand(MCOperand::createReg(SP::O6));
- LDSTUBInst.addOperand(MCOperand::createImm(-1));
-
- OutStreamer->emitInstruction(STBARInst, STI);
- OutStreamer->emitInstruction(LDSTUBInst, STI);
-}
-
MCOperand SparcAsmPrinter::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
default:
@@ -363,9 +341,6 @@ void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) {
case SP::GETPCX:
LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo());
return;
- case SP::V8BAR:
- LowerV8Bar(MI, getSubtargetInfo());
- return;
}
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index a7fbbd4044c11..52732de194b0f 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -653,6 +653,22 @@ bool SparcInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(Offset);
return true;
}
+ case SP::V8BAR: {
+ assert(!Subtarget.isV9() &&
+ "V8BAR should not be emitted on V9 processors!");
+
+ // Emit stbar; ldstub [%sp-1], %g0
+ // The sequence acts as a full barrier on V8 systems.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineInstr &InstSTBAR =
+ *BuildMI(MBB, MI, MI.getDebugLoc(), get(SP::STBAR));
+ MachineInstr &InstLDSTUB =
+ *BuildMI(MBB, MI, MI.getDebugLoc(), get(SP::LDSTUBri), SP::G0)
+ .addReg(SP::O6)
+ .addImm(-1);
+ MIBundleBuilder(MBB, InstSTBAR, InstLDSTUB);
+ MBB.erase(MI);
+ }
}
return false;
}
>From 2c52cb7366a0ebf4032debab7fbd1c1d693fcf9f Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Tue, 2 Sep 2025 19:17:49 +0700
Subject: [PATCH 8/8] Remove unneeded includes
---
llvm/lib/Target/Sparc/SparcISelLowering.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index a926fe56a61ef..c7fcd34105a72 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -34,9 +34,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
using namespace llvm;
More information about the llvm-commits
mailing list