[llvm] [AArch64] Add support for 16/32/64-bit floating-point atomic read-modify-write ops (PR #125686)

Jonathan Thackray via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 4 07:05:01 PST 2025


https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/125686

>From 74f93aa6ead8a101f81fe508664be6e86efdf9a9 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Thu, 30 Jan 2025 11:57:22 +0000
Subject: [PATCH] [AArch64] Add support for 16/32/64-bit floating-point atomic
 read-modify-write ops

Add support for 16/32/64-bit floating-point atomic read-modify-write
operations (FADD, FMAX, FMIN) using LDFADD, LDFMAX, LDFMIN atomic
instructions.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  22 +
 .../lib/Target/AArch64/AArch64InstrAtomics.td |  10 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |  26 +
 .../AArch64/Atomics/aarch64-atomicrmw-lsfe.ll | 949 +++++++++++++++++
 .../Atomics/aarch64_be-atomicrmw-lsfe.ll      | 964 ++++++++++++++++++
 .../CodeGen/AArch64/Atomics/generate-tests.py |  39 +-
 6 files changed, 2006 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84f6d421b70f96..de7180d78c1326 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -974,6 +974,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 #undef LCALLNAME5
   }
 
+  if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f64, LibCall);
+
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f64, LibCall);
+
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f64, LibCall);
+  }
+
   if (Subtarget->hasLSE128()) {
     // Custom lowering because i128 is not legal. Must be replaced by 2x64
     // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
@@ -27825,6 +27839,14 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   if (CanUseLSE128)
     return AtomicExpansionKind::None;
 
+  // Add support for LDFADD and friends
+  bool CanUseAtomicFP =
+      Subtarget->hasLSFE() && (AI->getOperation() == AtomicRMWInst::FAdd ||
+                               AI->getOperation() == AtomicRMWInst::FMax ||
+                               AI->getOperation() == AtomicRMWInst::FMin);
+  if (CanUseAtomicFP)
+    return AtomicExpansionKind::None;
+
   // Nand is not supported in LSE.
   // Leave 128 bits to LLSC or CmpXChg.
   if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5e6db9d007a555..045bebf15a701a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -543,6 +543,16 @@ let Predicates = [HasLSE] in {
   defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
 }
 
+defm atomic_load_fadd  : binary_atomic_op_fp<atomic_load_fadd>;
+defm atomic_load_fmin  : binary_atomic_op_fp<atomic_load_fmin>;
+defm atomic_load_fmax  : binary_atomic_op_fp<atomic_load_fmax>;
+
+let Predicates = [HasLSFE] in {
+  defm : LDFPOPregister_patterns<"LDFADD", "atomic_load_fadd">;
+  defm : LDFPOPregister_patterns<"LDFMAX", "atomic_load_fmax">;
+  defm : LDFPOPregister_patterns<"LDFMIN", "atomic_load_fmin">;
+}
+
 // v8.9a/v9.4a FEAT_LRCPC patterns
 let Predicates = [HasRCPC3, HasNEON] in {
   // LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09de..810255eabfded8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12474,6 +12474,32 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
                         (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
 }
 
+let Predicates = [HasLSFE] in
+multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
+                                         ValueType vt, dag SrcRHS, dag DstRHS> {
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # suffix) DstRHS, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "A" # suffix) DstRHS, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "L" # suffix) DstRHS, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "AL" # suffix) DstRHS, FPR64:$Rn)>;
+}
+
+multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
+                                     ValueType vt, dag RHS> {
+  defm : LDFPOPregister_patterns_ord_dag<inst, suffix, op, vt, RHS, RHS>;
+}
+
+multiclass LDFPOPregister_patterns<string inst, string op> {
+  defm : LDFPOPregister_patterns_ord<inst, "H", op, f16, (f16 FPR16:$Rm)>;
+  defm : LDFPOPregister_patterns_ord<inst, "S", op, f32, (f32 FPR32:$Rm)>;
+  defm : LDFPOPregister_patterns_ord<inst, "D", op, f64, (f64 FPR64:$Rm)>;
+}
+
 let Predicates = [HasLSE] in
 multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
                                         ValueType vt, dag OLD, dag NEW> {
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
new file mode 100644
index 00000000000000..3d6ad81ea674cc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -0,0 +1,949 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+
+define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK:    ldfadd h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK:    ldfadda h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK:    ldfaddl h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK:    ldfaddal h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK:    ldfaddal h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK:    ldfadd s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK:    ldfadda s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK:    ldfaddl s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK:    ldfaddal s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK:    ldfaddal s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK:    ldfadd d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK:    ldfadda d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK:    ldfaddl d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK:    ldfaddal d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK:    ldfaddal d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O0:    ldaxrh w0, [x9]
+; -O0:    cmp w0, w10, uxth
+; -O0:    stlxrh w8, w11, [x9]
+; -O0:    subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O1:    ldxrh w8, [x0]
+; -O1:    stxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O0:    ldaxrh w0, [x9]
+; -O0:    cmp w0, w10, uxth
+; -O0:    stlxrh w8, w11, [x9]
+; -O0:    subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O0:    ldaxrh w0, [x9]
+; -O0:    cmp w0, w10, uxth
+; -O0:    stlxrh w8, w11, [x9]
+; -O0:    subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O1:    ldxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O0:    ldaxrh w0, [x9]
+; -O0:    cmp w0, w10, uxth
+; -O0:    stlxrh w8, w11, [x9]
+; -O0:    subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O0:    ldaxrh w0, [x9]
+; -O0:    cmp w0, w10, uxth
+; -O0:    stlxrh w8, w11, [x9]
+; -O0:    subs w8, w8, w0, uxth
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O0:    ldaxr w0, [x9]
+; -O0:    cmp w0, w10
+; -O0:    stlxr w8, w11, [x9]
+; -O0:    subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O1:    ldxr w8, [x0]
+; -O1:    stxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O0:    ldaxr w0, [x9]
+; -O0:    cmp w0, w10
+; -O0:    stlxr w8, w11, [x9]
+; -O0:    subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O0:    ldaxr w0, [x9]
+; -O0:    cmp w0, w10
+; -O0:    stlxr w8, w11, [x9]
+; -O0:    subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O1:    ldxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O0:    ldaxr w0, [x9]
+; -O0:    cmp w0, w10
+; -O0:    stlxr w8, w11, [x9]
+; -O0:    subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O0:    ldaxr w0, [x9]
+; -O0:    cmp w0, w10
+; -O0:    stlxr w8, w11, [x9]
+; -O0:    subs w8, w0, w8
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O0:    ldaxr x0, [x9]
+; -O0:    cmp x0, x10
+; -O0:    stlxr w8, x11, [x9]
+; -O0:    subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O1:    ldxr x8, [x0]
+; -O1:    stxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O0:    ldaxr x0, [x9]
+; -O0:    cmp x0, x10
+; -O0:    stlxr w8, x11, [x9]
+; -O0:    subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O0:    ldaxr x0, [x9]
+; -O0:    cmp x0, x10
+; -O0:    stlxr w8, x11, [x9]
+; -O0:    subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O1:    ldxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O0:    ldaxr x0, [x9]
+; -O0:    cmp x0, x10
+; -O0:    stlxr w8, x11, [x9]
+; -O0:    subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O0:    ldaxr x0, [x9]
+; -O0:    cmp x0, x10
+; -O0:    stlxr w8, x11, [x9]
+; -O0:    subs x8, x0, x8
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_monotonic:
+; CHECK:    ldfmax h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acquire:
+; CHECK:    ldfmaxa h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_release:
+; CHECK:    ldfmaxl h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
+; CHECK:    ldfmaxal h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
+; CHECK:    ldfmaxal h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_monotonic:
+; CHECK:    ldfmax s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acquire:
+; CHECK:    ldfmaxa s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_release:
+; CHECK:    ldfmaxl s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
+; CHECK:    ldfmaxal s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
+; CHECK:    ldfmaxal s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_monotonic:
+; CHECK:    ldfmax d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acquire:
+; CHECK:    ldfmaxa d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_release:
+; CHECK:    ldfmaxl d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
+; CHECK:    ldfmaxal d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
+; CHECK:    ldfmaxal d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_monotonic:
+; CHECK:    ldfmin h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acquire:
+; CHECK:    ldfmina h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_release:
+; CHECK:    ldfminl h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
+; CHECK:    ldfminal h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
+; CHECK:    ldfminal h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_monotonic:
+; CHECK:    ldfmin s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acquire:
+; CHECK:    ldfmina s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_release:
+; CHECK:    ldfminl s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
+; CHECK:    ldfminal s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
+; CHECK:    ldfminal s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_monotonic:
+; CHECK:    ldfmin d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acquire:
+; CHECK:    ldfmina d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_release:
+; CHECK:    ldfminl d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
+; CHECK:    ldfminal d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
+; CHECK:    ldfminal d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
new file mode 100644
index 00000000000000..d6ae20f4f18a1a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
@@ -0,0 +1,964 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lsfe -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+
+define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK:    ldfadd h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK:    ldfadda h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK:    ldfaddl h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK:    ldfaddal h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK:    ldfaddal h0, h0, [x0]
+    %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK:    ldfadd s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK:    ldfadda s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK:    ldfaddl s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK:    ldfaddal s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK:    ldfaddal s0, s0, [x0]
+    %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK:    ldfadd d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK:    ldfadda d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK:    ldfaddl d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK:    ldfaddal d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK:    ldfaddal d0, d0, [x0]
+    %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fadd_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fadd_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w9, w8, uxth
+; -O0:    stlxrh w10, w12, [x11]
+; -O0:    subs w8, w9, w8, uxth
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
+; -O1:    ldxrh w8, [x0]
+; -O1:    stxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w9, w8, uxth
+; -O0:    stlxrh w10, w12, [x11]
+; -O0:    subs w8, w9, w8, uxth
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w9, w8, uxth
+; -O0:    stlxrh w10, w12, [x11]
+; -O0:    subs w8, w9, w8, uxth
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
+; -O1:    ldxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w9, w8, uxth
+; -O0:    stlxrh w10, w12, [x11]
+; -O0:    subs w8, w9, w8, uxth
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
+; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w9, w8, uxth
+; -O0:    stlxrh w10, w12, [x11]
+; -O0:    subs w8, w9, w8, uxth
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
+; -O1:    ldaxrh w8, [x0]
+; -O1:    stlxrh w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w9, w8
+; -O0:    stlxr w10, w12, [x11]
+; -O0:    subs w8, w9, w8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
+; -O1:    ldxr w8, [x0]
+; -O1:    stxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w9, w8
+; -O0:    stlxr w10, w12, [x11]
+; -O0:    subs w8, w9, w8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w9, w8
+; -O0:    stlxr w10, w12, [x11]
+; -O0:    subs w8, w9, w8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
+; -O1:    ldxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w9, w8
+; -O0:    stlxr w10, w12, [x11]
+; -O0:    subs w8, w9, w8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
+; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w9, w8
+; -O0:    stlxr w10, w12, [x11]
+; -O0:    subs w8, w9, w8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
+; -O1:    ldaxr w8, [x0]
+; -O1:    stlxr w9, w8, [x0]
+    %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x9, x8
+; -O0:    stlxr w10, x12, [x11]
+; -O0:    subs x8, x9, x8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
+; -O1:    ldxr x8, [x0]
+; -O1:    stxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x9, x8
+; -O0:    stlxr w10, x12, [x11]
+; -O0:    subs x8, x9, x8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x9, x8
+; -O0:    stlxr w10, x12, [x11]
+; -O0:    subs x8, x9, x8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
+; -O1:    ldxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x9, x8
+; -O0:    stlxr w10, x12, [x11]
+; -O0:    subs x8, x9, x8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
+; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x9, x8
+; -O0:    stlxr w10, x12, [x11]
+; -O0:    subs x8, x9, x8
+; -O0:    subs w8, w8, #1
+;
+; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
+; -O1:    ldaxr x8, [x0]
+; -O1:    stlxr w9, x8, [x0]
+    %r = atomicrmw fsub ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fsub_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fsub_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fsub_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fsub_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fsub_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fsub ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_monotonic:
+; CHECK:    ldfmax h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acquire:
+; CHECK:    ldfmaxa h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_release:
+; CHECK:    ldfmaxl h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
+; CHECK:    ldfmaxal h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
+; CHECK:    ldfmaxal h0, h0, [x0]
+    %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_monotonic:
+; CHECK:    ldfmax s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acquire:
+; CHECK:    ldfmaxa s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_release:
+; CHECK:    ldfmaxl s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
+; CHECK:    ldfmaxal s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
+; CHECK:    ldfmaxal s0, s0, [x0]
+    %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_monotonic:
+; CHECK:    ldfmax d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acquire:
+; CHECK:    ldfmaxa d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_release:
+; CHECK:    ldfmaxl d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
+; CHECK:    ldfmaxal d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
+; CHECK:    ldfmaxal d0, d0, [x0]
+    %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmax_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmax_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_monotonic:
+; CHECK:    ldfmin h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acquire:
+; CHECK:    ldfmina h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value acquire, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_release:
+; CHECK:    ldfminl h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value release, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
+; CHECK:    ldfminal h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 2
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
+; CHECK:    ldfminal h0, h0, [x0]
+    %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 2
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_monotonic:
+; CHECK:    ldfmin s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acquire:
+; CHECK:    ldfmina s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value acquire, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_release:
+; CHECK:    ldfminl s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value release, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
+; CHECK:    ldfminal s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 4
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
+; CHECK:    ldfminal s0, s0, [x0]
+    %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_monotonic:
+; CHECK:    ldfmin d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acquire:
+; CHECK:    ldfmina d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value acquire, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_release:
+; CHECK:    ldfminl d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value release, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
+; CHECK:    ldfminal d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 8
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
+; CHECK:    ldfminal d0, d0, [x0]
+    %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
+    ret double %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_monotonic(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_acquire(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value acquire, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_release(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value release, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_acq_rel(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 1
+    ret half %r
+}
+
+define dso_local half @atomicrmw_fmin_half_unaligned_seq_cst(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 1
+    ret half %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_monotonic(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_acquire(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value acquire, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_release(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value release, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_acq_rel(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 1
+    ret float %r
+}
+
+define dso_local float @atomicrmw_fmin_float_unaligned_seq_cst(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 1
+    ret float %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_monotonic(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_monotonic:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_acquire(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_acquire:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value acquire, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_release(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_release:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value release, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_acq_rel(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_acq_rel:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 1
+    ret double %r
+}
+
+define dso_local double @atomicrmw_fmin_double_unaligned_seq_cst(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_unaligned_seq_cst:
+; CHECK:    bl __atomic_compare_exchange
+    %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 1
+    ret double %r
+}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
index ecda5fd69ca5d9..2f74254e32dcae 100755
--- a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+++ b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
@@ -29,6 +29,20 @@ def __str__(self) -> str:
         return self.name
 
 
+# FP Type name size
+class FPType(enum.Enum):
+    # Value is the size in bytes
+    half = 2
+    float = 4
+    double = 8
+
+    def align(self, aligned: bool) -> int:
+        return self.value if aligned else 1
+
+    def __str__(self) -> str:
+        return self.name
+
+
 # Is this an aligned or unaligned access?
 class Aligned(enum.Enum):
     aligned = True
@@ -111,6 +125,7 @@ class Feature(enum.Flag):
     v8_1a = enum.auto()  # -mattr=+v8.1a, mandatory FEAT_LOR, FEAT_LSE
     rcpc = enum.auto()  # FEAT_LRCPC
     lse2 = enum.auto()  # FEAT_LSE2
+    lsfe = enum.auto()  # FEAT_LSFE
     outline_atomics = enum.auto()  # -moutline-atomics
     rcpc3 = enum.auto()  # FEAT_LSE2 + FEAT_LRCPC3
     lse2_lse128 = enum.auto()  # FEAT_LSE2 + FEAT_LSE128
@@ -125,6 +140,8 @@ def mattr(self):
             return "+lse2,+rcpc3"
         if self == Feature.lse2_lse128:
             return "+lse2,+lse128"
+        if self == Feature.lsfe:
+            return "+lsfe"
         return "+" + self.name
 
 
@@ -142,11 +159,18 @@ def mattr(self):
     "umin",
 ]
 
+FP_ATOMICRMW_OPS = [
+    "fadd",
+    "fsub",
+    "fmax",
+    "fmin",
+]
+
 
-def all_atomicrmw(f):
-    for op in ATOMICRMW_OPS:
+def all_atomicrmw(f, datatype, atomicrmw_ops):
+    for op in atomicrmw_ops:
         for aligned in Aligned:
-            for ty in Type:
+            for ty in datatype:
                 for ordering in ATOMICRMW_ORDERS:
                     name = f"atomicrmw_{op}_{ty}_{aligned}_{ordering}"
                     instr = "atomicrmw"
@@ -286,7 +310,14 @@ def write_lit_tests():
             with open(f"{triple}-atomicrmw-{feat.name}.ll", "w") as f:
                 filter_args = r'--filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"'
                 header(f, triple, [feat], filter_args)
-                all_atomicrmw(f)
+                if feat != Feature.lsfe:
+                    all_atomicrmw(f, Type, ATOMICRMW_OPS)
+                else:
+                    all_atomicrmw(f, FPType, FP_ATOMICRMW_OPS)
+
+            # Floating point atomics only supported for atomicrmw currently
+            if feat == Feature.lsfe:
+                continue
 
             with open(f"{triple}-cmpxchg-{feat.name}.ll", "w") as f:
                 filter_args = r'--filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"'



More information about the llvm-commits mailing list