[llvm] r310167 - [AArch64] LSE Atomics reorg - part 1

Joel Jones via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 4 21:30:55 PDT 2017


Author: joel_k_jones
Date: Fri Aug  4 21:30:55 2017
New Revision: 310167

URL: http://llvm.org/viewvc/llvm-project?rev=310167&view=rev
Log:
[AArch64] LSE Atomics reorg - part 1

Add memory synchronization semantics to LSE Atomics.

The memory semantics feature will be added in a subsequent patch.

In this patch, several corrections were added to the existing LSE Atomics
implementation, based on the ARM Errata D11904 from 05/12/2017.

Patch by: steleman

Differential Revision: https://reviews.llvm.org/D35319

Modified:
    llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
    llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td
    llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td
    llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll

Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAG.td?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSelectionDAG.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSelectionDAG.td Fri Aug  4 21:30:55 2017
@@ -1015,44 +1015,98 @@ def setle  : PatFrag<(ops node:$lhs, nod
 def setne  : PatFrag<(ops node:$lhs, node:$rhs),
                      (setcc node:$lhs, node:$rhs, SETNE)>;
 
-def atomic_cmp_swap_8 :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-def atomic_cmp_swap_16 :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-def atomic_cmp_swap_32 :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-def atomic_cmp_swap_64 :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+multiclass binary_atomic_op_ord<SDNode atomic_op> {
+  def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic;
+  }]>;
+  def #NAME#_acquire : PatFrag<(ops node:$ptr, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire;
+  }]>;
+  def #NAME#_release : PatFrag<(ops node:$ptr, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Release;
+  }]>;
+  def #NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::AcquireRelease;
+  }]>;
+  def #NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+  }]>;
+}
+
+multiclass ternary_atomic_op_ord<SDNode atomic_op> {
+  def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic;
+  }]>;
+  def #NAME#_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire;
+  }]>;
+  def #NAME#_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Release;
+  }]>;
+  def #NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::AcquireRelease;
+  }]>;
+  def #NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+      (!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
+        return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+  }]>;
+}
 
 multiclass binary_atomic_op<SDNode atomic_op> {
   def _8 : PatFrag<(ops node:$ptr, node:$val),
-                   (atomic_op node:$ptr, node:$val), [{
+                   (atomic_op  node:$ptr, node:$val), [{
     return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
   }]>;
   def _16 : PatFrag<(ops node:$ptr, node:$val),
-                   (atomic_op node:$ptr, node:$val), [{
+                    (atomic_op node:$ptr, node:$val), [{
     return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
   }]>;
   def _32 : PatFrag<(ops node:$ptr, node:$val),
-                   (atomic_op node:$ptr, node:$val), [{
+                    (atomic_op node:$ptr, node:$val), [{
     return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
   }]>;
   def _64 : PatFrag<(ops node:$ptr, node:$val),
-                   (atomic_op node:$ptr, node:$val), [{
+                    (atomic_op node:$ptr, node:$val), [{
     return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
   }]>;
+
+  defm NAME#_8  : binary_atomic_op_ord<atomic_op>;
+  defm NAME#_16 : binary_atomic_op_ord<atomic_op>;
+  defm NAME#_32 : binary_atomic_op_ord<atomic_op>;
+  defm NAME#_64 : binary_atomic_op_ord<atomic_op>;
+}
+
+multiclass ternary_atomic_op<SDNode atomic_op> {
+  def _8 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+                   (atomic_op  node:$ptr, node:$cmp, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
+  }]>;
+  def _16 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+                    (atomic_op node:$ptr, node:$cmp, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
+  }]>;
+  def _32 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+                    (atomic_op node:$ptr, node:$cmp, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
+  }]>;
+  def _64 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+                    (atomic_op node:$ptr, node:$cmp, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
+  }]>;
+
+  defm NAME#_8  : ternary_atomic_op_ord<atomic_op>;
+  defm NAME#_16 : ternary_atomic_op_ord<atomic_op>;
+  defm NAME#_32 : ternary_atomic_op_ord<atomic_op>;
+  defm NAME#_64 : ternary_atomic_op_ord<atomic_op>;
 }
 
 defm atomic_load_add  : binary_atomic_op<atomic_load_add>;
@@ -1067,6 +1121,7 @@ defm atomic_load_max  : binary_atomic_op
 defm atomic_load_umin : binary_atomic_op<atomic_load_umin>;
 defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
 defm atomic_store     : binary_atomic_op<atomic_store>;
+defm atomic_cmp_swap  : ternary_atomic_op<atomic_cmp_swap>;
 
 def atomic_load_8 :
   PatFrag<(ops node:$ptr),

Modified: llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp Fri Aug  4 21:30:55 2017
@@ -55,6 +55,8 @@ public:
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
+
+  bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
 };
 char AArch64DeadRegisterDefinitions::ID = 0;
 } // end anonymous namespace
@@ -69,6 +71,63 @@ static bool usesFrameIndex(const Machine
   return false;
 }
 
+bool
+AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
+                                           const MachineFunction &MF) const {
+  if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
+    return false;
+
+#define CASE_AARCH64_ATOMIC_(PREFIX) \
+  case AArch64::PREFIX##X: \
+  case AArch64::PREFIX##W: \
+  case AArch64::PREFIX##H: \
+  case AArch64::PREFIX##B
+
+  for (const MachineMemOperand *MMO : MI.memoperands()) {
+    if (MMO->isAtomic()) {
+      unsigned Opcode = MI.getOpcode();
+      switch (Opcode) {
+      default:
+        return false;
+        break;
+
+      CASE_AARCH64_ATOMIC_(LDADDA):
+      CASE_AARCH64_ATOMIC_(LDADDAL):
+
+      CASE_AARCH64_ATOMIC_(LDCLRA):
+      CASE_AARCH64_ATOMIC_(LDCLRAL):
+
+      CASE_AARCH64_ATOMIC_(LDEORA):
+      CASE_AARCH64_ATOMIC_(LDEORAL):
+
+      CASE_AARCH64_ATOMIC_(LDSETA):
+      CASE_AARCH64_ATOMIC_(LDSETAL):
+
+      CASE_AARCH64_ATOMIC_(LDSMAXA):
+      CASE_AARCH64_ATOMIC_(LDSMAXAL):
+
+      CASE_AARCH64_ATOMIC_(LDSMINA):
+      CASE_AARCH64_ATOMIC_(LDSMINAL):
+
+      CASE_AARCH64_ATOMIC_(LDUMAXA):
+      CASE_AARCH64_ATOMIC_(LDUMAXAL):
+
+      CASE_AARCH64_ATOMIC_(LDUMINA):
+      CASE_AARCH64_ATOMIC_(LDUMINAL):
+
+      CASE_AARCH64_ATOMIC_(SWPA):
+      CASE_AARCH64_ATOMIC_(SWPAL):
+        return true;
+        break;
+                                                                    }
+    }
+  }
+
+#undef CASE_AARCH64_ATOMIC_
+
+  return false;
+}
+
 void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
     MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
@@ -86,55 +145,12 @@ void AArch64DeadRegisterDefinitions::pro
       DEBUG(dbgs() << "    Ignoring, XZR or WZR already used by the instruction\n");
       continue;
     }
-    if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) {
-      // XZ/WZ for LSE can only be used when acquire semantics are not used,
-      // LDOPAL WZ is an invalid opcode.
-      switch (MI.getOpcode()) {
-      case AArch64::CASALB:
-      case AArch64::CASALH:
-      case AArch64::CASALW:
-      case AArch64::CASALX:
-      case AArch64::SWPALB:
-      case AArch64::SWPALH:
-      case AArch64::SWPALW:
-      case AArch64::SWPALX:
-      case AArch64::LDADDALB:
-      case AArch64::LDADDALH:
-      case AArch64::LDADDALW:
-      case AArch64::LDADDALX:
-      case AArch64::LDCLRALB:
-      case AArch64::LDCLRALH:
-      case AArch64::LDCLRALW:
-      case AArch64::LDCLRALX:
-      case AArch64::LDEORALB:
-      case AArch64::LDEORALH:
-      case AArch64::LDEORALW:
-      case AArch64::LDEORALX:
-      case AArch64::LDSETALB:
-      case AArch64::LDSETALH:
-      case AArch64::LDSETALW:
-      case AArch64::LDSETALX:
-      case AArch64::LDSMINALB:
-      case AArch64::LDSMINALH:
-      case AArch64::LDSMINALW:
-      case AArch64::LDSMINALX:
-      case AArch64::LDSMAXALB:
-      case AArch64::LDSMAXALH:
-      case AArch64::LDSMAXALW:
-      case AArch64::LDSMAXALX:
-      case AArch64::LDUMINALB:
-      case AArch64::LDUMINALH:
-      case AArch64::LDUMINALW:
-      case AArch64::LDUMINALX:
-      case AArch64::LDUMAXALB:
-      case AArch64::LDUMAXALH:
-      case AArch64::LDUMAXALW:
-      case AArch64::LDUMAXALX:
-        continue;
-      default:
-        break;
-      }
+
+    if (shouldSkip(MI, MF)) {
+      DEBUG(dbgs() << "    Ignoring, Atomic instruction with acquire semantics using WZR/XZR\n");
+      continue;
     }
+
     const MCInstrDesc &Desc = MI.getDesc();
     for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
       MachineOperand &MO = MI.getOperand(I);

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td Fri Aug  4 21:30:55 2017
@@ -407,57 +407,17 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$R
                    Sched<[WriteAtomic]>;
 
 // v8.1 Atomic instructions:
-def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALX GPR64:$Rs, GPR64sp:$Rn)>;
+let Predicates = [HasLSE] in {
+  defm : LDOPregister_patterns<"LDADD", "atomic_load_add">;
+  defm : LDOPregister_patterns<"LDSET", "atomic_load_or">;
+  defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">;
+  defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">;
+  defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">;
+  defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">;
+  defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">;
+  defm : LDOPregister_patterns<"SWP", "atomic_swap">;
+  defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
+  defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
+  defm : CASregister_patterns<"CAS", "atomic_cmp_swap">;
+}
 
-def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALB GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALH GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALW GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALX GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALB GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALH GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALW GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALX GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALB (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALH (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALW (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALX (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALB (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALH (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALW (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALX (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td Fri Aug  4 21:30:55 2017
@@ -9490,6 +9490,86 @@ multiclass LDOPregister<bits<3> opc, str
     def X : BaseLDOPregister<op, order, "", GPR64>;
 }
 
+// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
+// complex DAG for DstRHS.
+let Predicates = [HasLSE] in
+multiclass LDOPregister_patterns_ord_dag<string inst, string suffix, string op,
+                                         string size, dag SrcRHS, dag DstRHS> {
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # suffix) DstRHS, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS),
+            (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+}
+
+multiclass LDOPregister_patterns_ord<string inst, string suffix, string op,
+                                     string size, dag RHS> {
+  defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, RHS, RHS>;
+}
+
+multiclass LDOPregister_patterns_ord_mod<string inst, string suffix, string op,
+                                         string size, dag LHS, dag RHS> {
+  defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, LHS, RHS>;
+}
+
+multiclass LDOPregister_patterns<string inst, string op> {
+  defm : LDOPregister_patterns_ord<inst, "X", op, "64", (i64 GPR64:$Rm)>;
+  defm : LDOPregister_patterns_ord<inst, "W", op, "32", (i32 GPR32:$Rm)>;
+  defm : LDOPregister_patterns_ord<inst, "H", op, "16", (i32 GPR32:$Rm)>;
+  defm : LDOPregister_patterns_ord<inst, "B", op, "8",  (i32 GPR32:$Rm)>;
+}
+
+multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
+  defm : LDOPregister_patterns_ord_mod<inst, "X", op, "64",
+                        (i64 GPR64:$Rm),
+                        (i64 (!cast<Instruction>(mod#Xrr) XZR, GPR64:$Rm))>;
+  defm : LDOPregister_patterns_ord_mod<inst, "W", op, "32",
+                        (i32 GPR32:$Rm),
+                        (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+  defm : LDOPregister_patterns_ord_mod<inst, "H", op, "16",
+                        (i32 GPR32:$Rm),
+                        (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+  defm : LDOPregister_patterns_ord_mod<inst, "B", op, "8",
+                        (i32 GPR32:$Rm),
+                        (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+}
+
+let Predicates = [HasLSE] in
+multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
+                                        string size, dag OLD, dag NEW> {
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW),
+            (!cast<Instruction>(inst # suffix) OLD, NEW, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW),
+            (!cast<Instruction>(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW),
+            (!cast<Instruction>(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW),
+            (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+  def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW),
+            (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+}
+
+multiclass CASregister_patterns_ord<string inst, string suffix, string op,
+                                    string size, dag OLD, dag NEW> {
+  defm : CASregister_patterns_ord_dag<inst, suffix, op, size, OLD, NEW>;
+}
+
+multiclass CASregister_patterns<string inst, string op> {
+  defm : CASregister_patterns_ord<inst, "X", op, "64",
+                        (i64 GPR64:$Rold), (i64 GPR64:$Rnew)>;
+  defm : CASregister_patterns_ord<inst, "W", op, "32",
+                        (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+  defm : CASregister_patterns_ord<inst, "H", op, "16",
+                        (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+  defm : CASregister_patterns_ord<inst, "B", op, "8",
+                        (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+}
+
 let Predicates = [HasLSE] in
 class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
                         Instruction inst> :

Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td Fri Aug  4 21:30:55 2017
@@ -315,6 +315,36 @@ def THX2T99Write_8Cyc_LS01_F01 : SchedWr
   let NumMicroOps = 3;
 }
 
+// 8 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+// 12 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+  let Latency = 12;
+  let NumMicroOps = 6;
+}
+
+// 16 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+  let Latency = 16;
+  let NumMicroOps = 8;
+}
+
+// 24 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+  let Latency = 24;
+  let NumMicroOps = 12;
+}
+
+// 32 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+  let Latency = 32;
+  let NumMicroOps = 16;
+}
+
 // Define commonly used read types.
 
 // No forwarding is provided for these types.
@@ -1741,5 +1771,108 @@ def : InstRW<[THX2T99Write_1Cyc_LS01_F01
 def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
             (instregex "^ST4i(8|16|32|64)_POST$")>;
 
+// V8.1a Atomics (LSE)
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
 } // SchedModel = ThunderX2T99Model
 

Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll?rev=310167&r1=310166&r2=310167&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll Fri Aug  4 21:30:55 2017
@@ -630,7 +630,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wa
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
 ; CHECK-NOT: dmb
 
    ret i8 %old
@@ -645,7 +645,7 @@ define i16 @test_atomic_cmpxchg_i16(i16
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: casalh w0, w1, [x[[ADDR]]]
+; CHECK: casah w0, w1, [x[[ADDR]]]
 ; CHECK-NOT: dmb
 
    ret i16 %old
@@ -660,7 +660,7 @@ define i32 @test_atomic_cmpxchg_i32(i32
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casal w0, w1, [x[[ADDR]]]
+; CHECK: casa w0, w1, [x[[ADDR]]]
 ; CHECK-NOT: dmb
 
    ret i32 %old
@@ -675,7 +675,7 @@ define i64 @test_atomic_cmpxchg_i64(i64
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: casal x0, x1, [x[[ADDR]]]
+; CHECK: casa x0, x1, [x[[ADDR]]]
 ; CHECK-NOT: dmb
 
    ret i64 %old
@@ -842,3 +842,4045 @@ define void @test_atomic_load_and_i64_no
 ; CHECK-NOT: dmb
   ret void
 }
+
+define i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8_acq_rel:
+   %old = atomicrmw add i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16_acq_rel:
+   %old = atomicrmw add i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_acq_rel:
+   %old = atomicrmw add i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_acq_rel:
+   %old = atomicrmw add i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel:
+   atomicrmw add i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel:
+   atomicrmw add i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8_acquire:
+   %old = atomicrmw add i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16_acquire:
+   %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_acquire:
+   %old = atomicrmw add i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadda w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_acquire:
+   %old = atomicrmw add i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadda x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire:
+   atomicrmw add i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadda w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire:
+   atomicrmw add i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadda x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8_monotonic:
+   %old = atomicrmw add i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16_monotonic:
+   %old = atomicrmw add i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_monotonic:
+   %old = atomicrmw add i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadd w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_monotonic:
+   %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadd x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic:
+   atomicrmw add i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stadd w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic:
+   atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stadd x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8_release:
+   %old = atomicrmw add i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16_release:
+   %old = atomicrmw add i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_release:
+   %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_release:
+   %old = atomicrmw add i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_noret_release:
+   atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: staddl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_noret_release:
+   atomicrmw add i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: staddl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8_seq_cst:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16_seq_cst:
+   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_seq_cst:
+   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_seq_cst:
+   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst:
+   atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst:
+   atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8_acq_rel:
+  %old = atomicrmw and i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16_acq_rel:
+  %old = atomicrmw and i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_acq_rel:
+  %old = atomicrmw and i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_acq_rel:
+  %old = atomicrmw and i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i64 %old
+}
+
+define void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel:
+  atomicrmw and i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel:
+  atomicrmw and i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8_acquire:
+  %old = atomicrmw and i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldclrab w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16_acquire:
+  %old = atomicrmw and i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldclrah w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_acquire:
+  %old = atomicrmw and i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_acquire:
+  %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i64 %old
+}
+
+define void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire:
+  atomicrmw and i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire:
+  atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8_monotonic:
+  %old = atomicrmw and i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldclrb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16_monotonic:
+  %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldclrh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_monotonic:
+  %old = atomicrmw and i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclr w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_monotonic:
+  %old = atomicrmw and i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclr x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i64 %old
+}
+
+define void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic:
+  atomicrmw and i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stclr w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic:
+  atomicrmw and i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stclr x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8_release:
+  %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldclrlb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16_release:
+  %old = atomicrmw and i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldclrlh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_release:
+  %old = atomicrmw and i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclrl w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_release:
+  %old = atomicrmw and i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclrl x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i64 %old
+}
+
+define void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_noret_release:
+  atomicrmw and i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stclrl w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_noret_release:
+  atomicrmw and i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stclrl x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8_seq_cst:
+  %old = atomicrmw and i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16_seq_cst:
+  %old = atomicrmw and i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_seq_cst:
+  %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_seq_cst:
+  %old = atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret i64 %old
+}
+
+define void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst:
+  atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst:
+  atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire:
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %old = extractvalue { i8, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire:
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
+   %old = extractvalue { i16, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: casah w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire:
+   %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire
+   %old = extractvalue { i32, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: casa w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire:
+   %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire
+   %old = extractvalue { i64, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: casa x0, x1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic:
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic
+   %old = extractvalue { i8, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: casb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic:
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new monotonic monotonic
+   %old = extractvalue { i16, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: cash w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic:
+   %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new monotonic monotonic
+   %old = extractvalue { i32, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: cas w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic:
+   %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+   %old = extractvalue { i64, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: cas x0, x1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst:
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst
+   %old = extractvalue { i8, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst:
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+   %old = extractvalue { i16, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: casalh w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst:
+   %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst seq_cst
+   %old = extractvalue { i32, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: casal w0, w1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst:
+   %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst seq_cst
+   %old = extractvalue { i64, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: casal x0, x1, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8_acq_rel:
+   %old = atomicrmw max i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16_acq_rel:
+   %old = atomicrmw max i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_acq_rel:
+   %old = atomicrmw max i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_acq_rel:
+   %old = atomicrmw max i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel:
+   atomicrmw max i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel:
+   atomicrmw max i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8_acquire:
+   %old = atomicrmw max i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsmaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16_acquire:
+   %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsmaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_acquire:
+   %old = atomicrmw max i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_acquire:
+   %old = atomicrmw max i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire:
+   atomicrmw max i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire:
+   atomicrmw max i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8_monotonic:
+   %old = atomicrmw max i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsmaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16_monotonic:
+   %old = atomicrmw max i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsmaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_monotonic:
+   %old = atomicrmw max i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_monotonic:
+   %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic:
+   atomicrmw max i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stsmax w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic:
+   atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stsmax x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8_release:
+   %old = atomicrmw max i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsmaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16_release:
+   %old = atomicrmw max i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsmaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_release:
+   %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_release:
+   %old = atomicrmw max i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_noret_release:
+   atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stsmaxl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_noret_release:
+   atomicrmw max i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stsmaxl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8_seq_cst:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16_seq_cst:
+   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_seq_cst:
+   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_seq_cst:
+   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst:
+   atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst:
+   atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8_acq_rel:
+   %old = atomicrmw min i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16_acq_rel:
+   %old = atomicrmw min i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_acq_rel:
+   %old = atomicrmw min i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_acq_rel:
+   %old = atomicrmw min i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel:
+   atomicrmw min i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel:
+   atomicrmw min i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8_acquire:
+   %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16_acquire:
+   %old = atomicrmw min i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_acquire:
+   %old = atomicrmw min i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_acquire:
+   %old = atomicrmw min i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire:
+   atomicrmw min i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmina w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire:
+   atomicrmw min i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmina x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8_monotonic:
+   %old = atomicrmw min i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16_monotonic:
+   %old = atomicrmw min i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_monotonic:
+   %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsmin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_monotonic:
+   %old = atomicrmw min i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsmin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic:
+   atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stsmin w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic:
+   atomicrmw min i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stsmin x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8_release:
+   %old = atomicrmw min i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16_release:
+   %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_release:
+   %old = atomicrmw min i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_release:
+   %old = atomicrmw min i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_noret_release:
+   atomicrmw min i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stsminl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_noret_release:
+   atomicrmw min i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stsminl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8_seq_cst:
+   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16_seq_cst:
+   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_seq_cst:
+   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_seq_cst:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst:
+   atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst:
+   atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8_acq_rel:
+   %old = atomicrmw or i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16_acq_rel:
+   %old = atomicrmw or i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_acq_rel:
+   %old = atomicrmw or i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_acq_rel:
+   %old = atomicrmw or i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel:
+   atomicrmw or i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel:
+   atomicrmw or i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8_acquire:
+   %old = atomicrmw or i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsetab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16_acquire:
+   %old = atomicrmw or i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsetah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_acquire:
+   %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldseta w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_acquire:
+   %old = atomicrmw or i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldseta x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire:
+   atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldseta w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire:
+   atomicrmw or i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldseta x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8_monotonic:
+   %old = atomicrmw or i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsetb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16_monotonic:
+   %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldseth w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_monotonic:
+   %old = atomicrmw or i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldset w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_monotonic:
+   %old = atomicrmw or i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldset x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic:
+   atomicrmw or i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stset w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic:
+   atomicrmw or i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stset x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8_release:
+   %old = atomicrmw or i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsetlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16_release:
+   %old = atomicrmw or i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsetlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_release:
+   %old = atomicrmw or i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsetl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_release:
+   %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsetl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_noret_release:
+   atomicrmw or i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stsetl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_noret_release:
+   atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stsetl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8_seq_cst:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16_seq_cst:
+   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_seq_cst:
+   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_seq_cst:
+   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst:
+   atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst:
+   atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel:
+  %old = atomicrmw sub i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel:
+  %old = atomicrmw sub i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel:
+  %old = atomicrmw sub i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel:
+  %old = atomicrmw sub i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
+  atomicrmw sub i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
+  atomicrmw sub i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_acquire:
+  %old = atomicrmw sub i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddab w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_acquire:
+  %old = atomicrmw sub i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddah w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_acquire:
+  %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_acquire:
+  %old = atomicrmw sub i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire:
+  atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire:
+  atomicrmw sub i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_monotonic:
+  %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_monotonic:
+  %old = atomicrmw sub i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_monotonic:
+  %old = atomicrmw sub i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldadd w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_monotonic:
+  %old = atomicrmw sub i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldadd x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic:
+  atomicrmw sub i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stadd w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic:
+  atomicrmw sub i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stadd x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_release:
+  %old = atomicrmw sub i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddlb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_release:
+  %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddlh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_release:
+  %old = atomicrmw sub i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddl w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_release:
+  %old = atomicrmw sub i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddl x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_noret_release:
+  atomicrmw sub i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: staddl w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_noret_release:
+  atomicrmw sub i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: staddl x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst:
+  %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst:
+  %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst:
+  %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst:
+  %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
+  atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
+  atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret void
+}
+
+define i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel:
+   %old = atomicrmw xchg i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel:
+   %old = atomicrmw xchg i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel:
+   %old = atomicrmw xchg i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel:
+   %old = atomicrmw xchg i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
+   atomicrmw xchg i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
+   atomicrmw xchg i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8_acquire:
+   %old = atomicrmw xchg i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: swpab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16_acquire:
+   %old = atomicrmw xchg i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: swpah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_acquire:
+   %old = atomicrmw xchg i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_acquire:
+   %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire:
+   atomicrmw xchg i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire:
+   atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic:
+   %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: swpb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic:
+   %old = atomicrmw xchg i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: swph w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic:
+   %old = atomicrmw xchg i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic:
+   %old = atomicrmw xchg i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
+   atomicrmw xchg i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
+   atomicrmw xchg i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8_release:
+   %old = atomicrmw xchg i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: swplb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16_release:
+   %old = atomicrmw xchg i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: swplh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_release:
+   %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_release:
+   %old = atomicrmw xchg i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release:
+   atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release:
+   atomicrmw xchg i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst:
+   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst:
+   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst:
+   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
+   atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
+   atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret void
+}
+
+define i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel:
+   %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel:
+   %old = atomicrmw umax i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel:
+   %old = atomicrmw umax i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel:
+   %old = atomicrmw umax i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
+   atomicrmw umax i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
+   atomicrmw umax i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8_acquire:
+   %old = atomicrmw umax i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldumaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16_acquire:
+   %old = atomicrmw umax i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldumaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_acquire:
+   %old = atomicrmw umax i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_acquire:
+   %old = atomicrmw umax i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire:
+   atomicrmw umax i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire:
+   atomicrmw umax i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8_monotonic:
+   %old = atomicrmw umax i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldumaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16_monotonic:
+   %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldumaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_monotonic:
+   %old = atomicrmw umax i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_monotonic:
+   %old = atomicrmw umax i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic:
+   atomicrmw umax i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stumax w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic:
+   atomicrmw umax i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stumax x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8_release:
+   %old = atomicrmw umax i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldumaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16_release:
+   %old = atomicrmw umax i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldumaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_release:
+   %old = atomicrmw umax i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_release:
+   %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_noret_release:
+   atomicrmw umax i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stumaxl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_noret_release:
+   atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stumaxl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst:
+   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst:
+   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst:
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
+   atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
+   atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel:
+   %old = atomicrmw umin i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel:
+   %old = atomicrmw umin i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel:
+   %old = atomicrmw umin i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel:
+   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
+   atomicrmw umin i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
+   atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8_acquire:
+   %old = atomicrmw umin i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: lduminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16_acquire:
+   %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: lduminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_acquire:
+   %old = atomicrmw umin i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_acquire:
+   %old = atomicrmw umin i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire:
+   atomicrmw umin i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumina w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire:
+   atomicrmw umin i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumina x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8_monotonic:
+   %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: lduminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16_monotonic:
+   %old = atomicrmw umin i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: lduminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_monotonic:
+   %old = atomicrmw umin i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldumin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_monotonic:
+   %old = atomicrmw umin i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldumin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic:
+   atomicrmw umin i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stumin w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic:
+   atomicrmw umin i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stumin x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8_release:
+   %old = atomicrmw umin i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: lduminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16_release:
+   %old = atomicrmw umin i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: lduminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_release:
+   %old = atomicrmw umin i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: lduminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_release:
+   %old = atomicrmw umin i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: lduminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_noret_release:
+   atomicrmw umin i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: stuminl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_noret_release:
+   atomicrmw umin i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: stuminl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst:
+   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst:
+   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst:
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
+   atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
+   atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel:
+   %old = atomicrmw xor i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel:
+   %old = atomicrmw xor i16* @var16, i16 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel:
+   %old = atomicrmw xor i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel:
+   %old = atomicrmw xor i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
+   atomicrmw xor i32* @var32, i32 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
+   atomicrmw xor i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8_acquire:
+   %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldeorab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16_acquire:
+   %old = atomicrmw xor i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldeorah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_acquire:
+   %old = atomicrmw xor i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeora w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_acquire:
+   %old = atomicrmw xor i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeora x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire:
+   atomicrmw xor i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeora w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire:
+   atomicrmw xor i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeora x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8_monotonic:
+   %old = atomicrmw xor i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldeorb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16_monotonic:
+   %old = atomicrmw xor i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldeorh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_monotonic:
+   %old = atomicrmw xor i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeor w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_monotonic:
+   %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeor x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic:
+   atomicrmw xor i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: steor w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic:
+   atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: steor x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8_release:
+   %old = atomicrmw xor i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldeorlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16_release:
+   %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldeorlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_release:
+   %old = atomicrmw xor i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeorl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_release:
+   %old = atomicrmw xor i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeorl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_noret_release:
+   atomicrmw xor i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: steorl w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_noret_release:
+   atomicrmw xor i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: steorl x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst:
+   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst:
+   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst:
+   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+   ret i64 %old
+}
+
+define void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
+   atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+
+; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+define void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
+   atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+
+; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+  ret void
+}
+
+




More information about the llvm-commits mailing list