[llvm] [AArch64] Add support for 16/32/64-bit floating-point atomic read-modify-write ops (PR #125686)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 05:48:19 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Jonathan Thackray (jthackray)
<details>
<summary>Changes</summary>
Add support for AArch64 16/32/64-bit floating-point atomic read-modify-write
operations (FADD, FMAX, FMIN) using LDFADD, LDFMAX, LDFMIN atomic instructions.
---
Patch is 78.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125686.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+22)
- (modified) llvm/lib/Target/AArch64/AArch64InstrAtomics.td (+10)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+26)
- (added) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll (+949)
- (added) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll (+964)
- (modified) llvm/test/CodeGen/AArch64/Atomics/generate-tests.py (+34-4)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84f6d421b70f961..a9a7f31441d932c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -974,6 +974,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
#undef LCALLNAME5
}
+ if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f64, LibCall);
+
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f64, LibCall);
+
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f64, LibCall);
+ }
+
if (Subtarget->hasLSE128()) {
// Custom lowering because i128 is not legal. Must be replaced by 2x64
// values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
@@ -27825,6 +27839,14 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (CanUseLSE128)
return AtomicExpansionKind::None;
+ // Add support for LDFADD and friends
+ bool CanUseAtomicFP = Subtarget->hasLSFE() &&
+ (AI->getOperation() == AtomicRMWInst::FAdd ||
+ AI->getOperation() == AtomicRMWInst::FMax ||
+ AI->getOperation() == AtomicRMWInst::FMin);
+ if (CanUseAtomicFP)
+ return AtomicExpansionKind::None;
+
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5e6db9d007a5557..045bebf15a701ab 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -543,6 +543,16 @@ let Predicates = [HasLSE] in {
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
}
+defm atomic_load_fadd : binary_atomic_op_fp<atomic_load_fadd>;
+defm atomic_load_fmin : binary_atomic_op_fp<atomic_load_fmin>;
+defm atomic_load_fmax : binary_atomic_op_fp<atomic_load_fmax>;
+
+let Predicates = [HasLSFE] in {
+ defm : LDFPOPregister_patterns<"LDFADD", "atomic_load_fadd">;
+ defm : LDFPOPregister_patterns<"LDFMAX", "atomic_load_fmax">;
+ defm : LDFPOPregister_patterns<"LDFMIN", "atomic_load_fmin">;
+}
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09def..810255eabfded8b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12474,6 +12474,32 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
}
+let Predicates = [HasLSFE] in
+multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
+ ValueType vt, dag SrcRHS, dag DstRHS> {
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "A" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "L" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+}
+
+multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
+ ValueType vt, dag RHS> {
+ defm : LDFPOPregister_patterns_ord_dag<inst, suffix, op, vt, RHS, RHS>;
+}
+
+multiclass LDFPOPregister_patterns<string inst, string op> {
+ defm : LDFPOPregister_patterns_ord<inst, "H", op, f16, (f16 FPR16:$Rm)>;
+ defm : LDFPOPregister_patterns_ord<inst, "S", op, f32, (f32 FPR32:$Rm)>;
+ defm : LDFPOPregister_patterns_ord<inst, "D", op, f64, (f64 FPR64:$Rm)>;
+}
+
let Predicates = [HasLSE] in
multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
ValueType vt, dag OLD, dag NEW> {
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
new file mode 100644
index 000000000000000..3d6ad81ea674cc5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -0,0 +1,949 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+
+define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK: ldfadd h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK: ldfadda h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK: ldfaddl h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK: ldfaddal h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK: ldfaddal h0, h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK: ldfadd s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK: ldfadda s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK: ldfaddl s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value release, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK: ldfaddal s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK: ldfaddal s0, s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK: ldfadd d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK: ldfadda d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK: ldfaddl d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value release, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK: ldfaddal d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK: ldfaddal d0, d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+ ret double %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value acquire, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 1
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 1
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value acquire, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value release, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 1
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 1
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_monotonic:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acquire:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value acquire, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_release:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value release, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acq_rel:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 1
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_seq_cst:
+; CHECK: bl __atomic_compare_exchange
+ %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 1
+ ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O1: ldxrh w8, [x0]
+; -O1: stxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O1: ldaxrh w8, [x0]
+; -O1: stxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value acquire, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O1: ldxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value release, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O1: ldaxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 2
+ ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O0: ldaxrh w0, [x9]
+; -O0: cmp w0, w10, uxth
+; -O0: stlxrh w8, w11, [x9]
+; -O0: subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O1: ldaxrh w8, [x0]
+; -O1: stlxrh w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 2
+ ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O1: ldxr w8, [x0]
+; -O1: stxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O1: ldaxr w8, [x0]
+; -O1: stxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value acquire, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O1: ldxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value release, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O1: ldaxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 4
+ ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O0: ldaxr w0, [x9]
+; -O0: cmp w0, w10
+; -O0: stlxr w8, w11, [x9]
+; -O0: subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O1: ldaxr w8, [x0]
+; -O1: stlxr w9, w8, [x0]
+ %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
+ ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O1: ldxr x8, [x0]
+; -O1: stxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O1: ldaxr x8, [x0]
+; -O1: stxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value acquire, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O1: ldxr x8, [x0]
+; -O1: stlxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value release, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O1: ldaxr x8, [x0]
+; -O1: stlxr w9, x8, [x0]
+ %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 8
+ ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O0: ldaxr x0, [x9]
+; -O0: cmp x0, x10
+; -O0: stlxr w8, x11, [x9]
+; -O0: subs x8, x0, x8
+;
+; -O1-LABEL: atomicrm...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/125686
More information about the llvm-commits
mailing list