[llvm] 08a89bb - [AArch64] Codegen for 16/32/64-bit floating-point atomicrmw ops (#125686)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 03:22:37 PST 2025


Author: Jonathan Thackray
Date: 2025-03-07T11:22:33Z
New Revision: 08a89bb02ec4a4de8ad9bd94e28701d635b1f876

URL: https://github.com/llvm/llvm-project/commit/08a89bb02ec4a4de8ad9bd94e28701d635b1f876
DIFF: https://github.com/llvm/llvm-project/commit/08a89bb02ec4a4de8ad9bd94e28701d635b1f876.diff

LOG: [AArch64] Codegen for 16/32/64-bit floating-point atomicrmw ops (#125686)

Codegen for AArch64 16/32/64-bit floating-point atomic read-modify-write
operations (`atomicrmw {fadd,fmin,fmax}`) using LD{B}FADD, LD{B}FMAX and
LD{B}FMIN atomic instructions.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrAtomics.td
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e7d141d22c7c7..5f4a16a41de4a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -984,6 +984,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 #undef LCALLNAME5
   }
 
+  if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::bf16, LibCall);
+
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::bf16, LibCall);
+
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::bf16, LibCall);
+  }
+
   if (Subtarget->hasLSE128()) {
     // Custom lowering because i128 is not legal. Must be replaced by 2x64
     // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
@@ -27907,6 +27924,12 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   if (CanUseLSE128)
     return AtomicExpansionKind::None;
 
+  // If LSFE available, use atomic FP instructions in preference to expansion
+  if (Subtarget->hasLSFE() && (AI->getOperation() == AtomicRMWInst::FAdd ||
+                               AI->getOperation() == AtomicRMWInst::FMax ||
+                               AI->getOperation() == AtomicRMWInst::FMin))
+    return AtomicExpansionKind::None;
+
   // Nand is not supported in LSE.
   // Leave 128 bits to LLSC or CmpXChg.
   if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 5e6db9d007a55..2d7a9d6f00bd0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -543,6 +543,20 @@ let Predicates = [HasLSE] in {
   defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
 }
 
+defm atomic_load_fadd  : binary_atomic_op_fp<atomic_load_fadd>;
+defm atomic_load_fmin  : binary_atomic_op_fp<atomic_load_fmin>;
+defm atomic_load_fmax  : binary_atomic_op_fp<atomic_load_fmax>;
+
+let Predicates = [HasLSFE] in {
+  defm : LDFPOPregister_patterns<"LDFADD",   "atomic_load_fadd">;
+  defm : LDFPOPregister_patterns<"LDFMAXNM", "atomic_load_fmax">;
+  defm : LDFPOPregister_patterns<"LDFMINNM", "atomic_load_fmin">;
+
+  defm : LDBFPOPregister_patterns<"LDBFADD",   "atomic_load_fadd">;
+  defm : LDBFPOPregister_patterns<"LDBFMAXNM", "atomic_load_fmax">;
+  defm : LDBFPOPregister_patterns<"LDBFMINNM", "atomic_load_fmin">;
+}
+
 // v8.9a/v9.4a FEAT_LRCPC patterns
 let Predicates = [HasRCPC3, HasNEON] in {
   // LDAP1 loads

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7a8b743faf4a3..255cd0ec5840c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12493,6 +12493,36 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
                         (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
 }
 
+let Predicates = [HasLSFE] in
+multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
+                                         ValueType vt, dag data> {
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, data),
+            (!cast<Instruction>(inst # suffix) data, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, data),
+            (!cast<Instruction>(inst # "A" # suffix) data, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, data),
+            (!cast<Instruction>(inst # "L" # suffix) data, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, data),
+            (!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
+  def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, data),
+            (!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
+}
+
+multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
+                                     ValueType vt, dag RHS> {
+  defm : LDFPOPregister_patterns_ord_dag<inst, suffix, op, vt, RHS>;
+}
+
+multiclass LDFPOPregister_patterns<string inst, string op> {
+  defm : LDFPOPregister_patterns_ord<inst, "H", op, f16, (f16 FPR16:$Rm)>;
+  defm : LDFPOPregister_patterns_ord<inst, "S", op, f32, (f32 FPR32:$Rm)>;
+  defm : LDFPOPregister_patterns_ord<inst, "D", op, f64, (f64 FPR64:$Rm)>;
+}
+
+multiclass LDBFPOPregister_patterns<string inst, string op> {
+  defm : LDFPOPregister_patterns_ord<inst, "", op, bf16, (bf16 FPR16:$Rm)>;
+}
+
 let Predicates = [HasLSE] in
 multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
                                         ValueType vt, dag OLD, dag NEW> {

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c836f3138a45f..ee109277dfbba 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10564,7 +10564,7 @@ let Predicates = [HasLSFE] in {
   defm LDFMAXNML  : AtomicFPLoad<0b01, 0b110, "ldfmaxnml">;
   defm LDFMINNMA  : AtomicFPLoad<0b10, 0b111, "ldfminnma">;
   defm LDFMINNMAL : AtomicFPLoad<0b11, 0b111, "ldfminnmal">;
-  defm LDFMINMN   : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
+  defm LDFMINNM   : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
   defm LDFMINNML  : AtomicFPLoad<0b01, 0b111, "ldfminnml">;
   // BFloat16
   def LDBFADDA    : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b000, "ldbfadda">;

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
index 992d050cf9ca0..fc9a126f79a83 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -4,306 +4,141 @@
 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
 
 define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK:    ldfadd h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK:    ldfadda h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK:    ldfaddl h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK:    ldfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK:    ldfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
+; CHECK:    ldbfadd h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
+; CHECK:    ldbfadda h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_release:
+; CHECK:    ldbfaddl h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
+; CHECK:    ldbfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
+; CHECK:    ldbfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK:    ldfadd s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK:    ldfadda s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK:    ldfaddl s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK:    ldfaddal s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK:    ldfaddal s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK:    ldfadd d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK:    ldfadda d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK:    ldfaddl d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK:    ldfaddal d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK:    ldfaddal d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }
@@ -964,306 +799,141 @@ define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, doubl
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_monotonic:
+; CHECK:    ldfmaxnm h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acquire:
+; CHECK:    ldfmaxnma h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_release:
+; CHECK:    ldfmaxnml h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
+; CHECK:    ldfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
+; CHECK:    ldfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
+; CHECK:    ldbfmaxnm h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
+; CHECK:    ldbfmaxnma h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_release:
+; CHECK:    ldbfmaxnml h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
+; CHECK:    ldbfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
+; CHECK:    ldbfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_monotonic:
+; CHECK:    ldfmaxnm s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acquire:
+; CHECK:    ldfmaxnma s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_release:
+; CHECK:    ldfmaxnml s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
+; CHECK:    ldfmaxnmal s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
+; CHECK:    ldfmaxnmal s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_monotonic:
+; CHECK:    ldfmaxnm d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acquire:
+; CHECK:    ldfmaxnma d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_release:
+; CHECK:    ldfmaxnml d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
+; CHECK:    ldfmaxnmal d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
+; CHECK:    ldfmaxnmal d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }
@@ -1444,306 +1114,141 @@ define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, doubl
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_monotonic:
+; CHECK:    ldfminnm h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acquire:
+; CHECK:    ldfminnma h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_release:
+; CHECK:    ldfminnml h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
+; CHECK:    ldfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
+; CHECK:    ldfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
+; CHECK:    ldbfminnm h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
+; CHECK:    ldbfminnma h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_release:
+; CHECK:    ldbfminnml h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
+; CHECK:    ldbfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
+; CHECK:    ldbfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_monotonic:
+; CHECK:    ldfminnm s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acquire:
+; CHECK:    ldfminnma s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_release:
+; CHECK:    ldfminnml s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
+; CHECK:    ldfminnmal s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
+; CHECK:    ldfminnmal s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_monotonic:
+; CHECK:    ldfminnm d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acquire:
+; CHECK:    ldfminnma d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_release:
+; CHECK:    ldfminnml d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
+; CHECK:    ldfminnmal d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
+; CHECK:    ldfminnmal d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
index 6c46407177297..a22cc5806d86d 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
@@ -4,321 +4,141 @@
 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lsfe -O1 | FileCheck %s --check-prefixes=CHECK,-O1
 
 define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic:
+; CHECK:    ldfadd h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
+; CHECK:    ldfadda h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release:
+; CHECK:    ldfaddl h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
+; CHECK:    ldfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
+; CHECK:    ldfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
+; CHECK:    ldbfadd h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
+; CHECK:    ldbfadda h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_release:
+; CHECK:    ldbfaddl h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
+; CHECK:    ldbfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
+; CHECK:    ldbfaddal h0, h0, [x0]
     %r = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic:
+; CHECK:    ldfadd s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
+; CHECK:    ldfadda s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release:
+; CHECK:    ldfaddl s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
+; CHECK:    ldfaddal s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
+; CHECK:    ldfaddal s0, s0, [x0]
     %r = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic:
+; CHECK:    ldfadd d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
+; CHECK:    ldfadda d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release:
+; CHECK:    ldfaddl d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
+; CHECK:    ldfaddal d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
+; CHECK:    ldfaddal d0, d0, [x0]
     %r = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }
@@ -994,321 +814,141 @@ define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, doubl
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_monotonic:
+; CHECK:    ldfmaxnm h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acquire:
+; CHECK:    ldfmaxnma h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_release:
+; CHECK:    ldfmaxnml h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
+; CHECK:    ldfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
+; CHECK:    ldfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
+; CHECK:    ldbfmaxnm h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
+; CHECK:    ldbfmaxnma h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_release:
+; CHECK:    ldbfmaxnml h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
+; CHECK:    ldbfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
+; CHECK:    ldbfmaxnmal h0, h0, [x0]
     %r = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_monotonic:
+; CHECK:    ldfmaxnm s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acquire:
+; CHECK:    ldfmaxnma s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_release:
+; CHECK:    ldfmaxnml s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
+; CHECK:    ldfmaxnmal s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
+; CHECK:    ldfmaxnmal s0, s0, [x0]
     %r = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_monotonic:
+; CHECK:    ldfmaxnm d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acquire:
+; CHECK:    ldfmaxnma d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_release:
+; CHECK:    ldfmaxnml d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
+; CHECK:    ldfmaxnmal d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
+; CHECK:    ldfmaxnmal d0, d0, [x0]
     %r = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }
@@ -1489,321 +1129,141 @@ define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, doubl
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_monotonic:
+; CHECK:    ldfminnm h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acquire:
+; CHECK:    ldfminnma h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value acquire, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_release:
+; CHECK:    ldfminnml h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value release, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
+; CHECK:    ldfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value acq_rel, align 2
     ret half %r
 }
 
 define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
-; -O0-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
+; CHECK:    ldfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, half %value seq_cst, align 2
     ret half %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
+; CHECK:    ldbfminnm h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value monotonic, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
+; CHECK:    ldbfminnma h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value acquire, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_release(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_release:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_release:
-; -O1:    ldxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_release:
+; CHECK:    ldbfminnml h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value release, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
+; CHECK:    ldbfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value acq_rel, align 2
     ret bfloat %r
 }
 
 define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat %value) {
-; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
-; -O0:    add w8, w8, w9
-; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w9, [x11]
-; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w9, w8, uxth
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
-; -O1:    ldaxrh w9, [x0]
-; -O1:    add w9, w9, w8
-; -O1:    add w9, w10, w9
-; -O1:    stlxrh w10, w9, [x0]
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
+; CHECK:    ldbfminnmal h0, h0, [x0]
     %r = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 2
     ret bfloat %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O1:    ldxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_monotonic:
+; CHECK:    ldfminnm s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acquire:
+; CHECK:    ldfminnma s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value acquire, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O1:    ldxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_release:
+; CHECK:    ldfminnml s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value release, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
+; CHECK:    ldfminnmal s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value acq_rel, align 4
     ret float %r
 }
 
 define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
-; -O0-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
-; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O1:    ldaxr w8, [x0]
-; -O1:    stlxr w9, w8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
+; CHECK:    ldfminnmal s0, s0, [x0]
     %r = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
     ret float %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O1:    ldxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_monotonic:
+; CHECK:    ldfminnm d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acquire:
+; CHECK:    ldfminnma d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value acquire, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O1:    ldxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_release:
+; CHECK:    ldfminnml d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value release, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
+; CHECK:    ldfminnmal d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value acq_rel, align 8
     ret double %r
 }
 
 define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
-; -O0-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
-; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
-; -O0:    subs w8, w8, #1
-;
-; -O1-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O1:    ldaxr x8, [x0]
-; -O1:    stlxr w9, x8, [x0]
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
+; CHECK:    ldfminnmal d0, d0, [x0]
     %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
     ret double %r
 }


        


More information about the llvm-commits mailing list