[llvm] fcad2bb - [RISC-V] Add proposed mapping for Ztso

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 10 07:59:38 PDT 2023


Author: Patrick O'Neill
Date: 2023-08-10T15:59:06+01:00
New Revision: fcad2bbcfc1f0acaa91fe03b71ff069975f80c8e

URL: https://github.com/llvm/llvm-project/commit/fcad2bbcfc1f0acaa91fe03b71ff069975f80c8e
DIFF: https://github.com/llvm/llvm-project/commit/fcad2bbcfc1f0acaa91fe03b71ff069975f80c8e.diff

LOG: [RISC-V] Add proposed mapping for Ztso

Currently LLVM emits Ztso code for fences, loads, and stores (behind an
experimental flag) [1]. This patch updates the mapping and implements
support for LR/SC and AMO ops. This updated mapping is compatible with
the RVWMO ABI present in the psABI. Additional context can be found in
the psABI pull request [2].

[1] https://reviews.llvm.org/D143076
[2] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/391

Differential Revision: https://reviews.llvm.org/D155517

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
    llvm/lib/Target/RISCV/RISCVFeatures.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoA.td
    llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
    llvm/test/CodeGen/RISCV/atomic-load-store.ll
    llvm/test/CodeGen/RISCV/atomic-rmw.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 59f1e8319ae72e..d0d0e32334484b 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -30,6 +30,7 @@ namespace {
 
 class RISCVExpandAtomicPseudo : public MachineFunctionPass {
 public:
+  const RISCVSubtarget *STI;
   const RISCVInstrInfo *TII;
   static char ID;
 
@@ -72,7 +73,8 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass {
 char RISCVExpandAtomicPseudo::ID = 0;
 
 bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+  STI = &MF.getSubtarget<RISCVSubtarget>();
+  TII = STI->getInstrInfo();
 
 #ifndef NDEBUG
   const unsigned OldSize = getInstSizeInBytes(MF);
@@ -148,24 +150,30 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
   return false;
 }
 
-static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+static unsigned getLRForRMW32(AtomicOrdering Ordering,
+                              const RISCVSubtarget *Subtarget) {
   switch (Ordering) {
   default:
     llvm_unreachable("Unexpected AtomicOrdering");
   case AtomicOrdering::Monotonic:
     return RISCV::LR_W;
   case AtomicOrdering::Acquire:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::LR_W;
     return RISCV::LR_W_AQ;
   case AtomicOrdering::Release:
     return RISCV::LR_W;
   case AtomicOrdering::AcquireRelease:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::LR_W;
     return RISCV::LR_W_AQ;
   case AtomicOrdering::SequentiallyConsistent:
     return RISCV::LR_W_AQ_RL;
   }
 }
 
-static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+static unsigned getSCForRMW32(AtomicOrdering Ordering,
+                              const RISCVSubtarget *Subtarget) {
   switch (Ordering) {
   default:
     llvm_unreachable("Unexpected AtomicOrdering");
@@ -174,32 +182,42 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
   case AtomicOrdering::Acquire:
     return RISCV::SC_W;
   case AtomicOrdering::Release:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::SC_W;
     return RISCV::SC_W_RL;
   case AtomicOrdering::AcquireRelease:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::SC_W;
     return RISCV::SC_W_RL;
   case AtomicOrdering::SequentiallyConsistent:
     return RISCV::SC_W_RL;
   }
 }
 
-static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+static unsigned getLRForRMW64(AtomicOrdering Ordering,
+                              const RISCVSubtarget *Subtarget) {
   switch (Ordering) {
   default:
     llvm_unreachable("Unexpected AtomicOrdering");
   case AtomicOrdering::Monotonic:
     return RISCV::LR_D;
   case AtomicOrdering::Acquire:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::LR_D;
     return RISCV::LR_D_AQ;
   case AtomicOrdering::Release:
     return RISCV::LR_D;
   case AtomicOrdering::AcquireRelease:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::LR_D;
     return RISCV::LR_D_AQ;
   case AtomicOrdering::SequentiallyConsistent:
     return RISCV::LR_D_AQ_RL;
   }
 }
 
-static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+static unsigned getSCForRMW64(AtomicOrdering Ordering,
+                              const RISCVSubtarget *Subtarget) {
   switch (Ordering) {
   default:
     llvm_unreachable("Unexpected AtomicOrdering");
@@ -208,27 +226,33 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) {
   case AtomicOrdering::Acquire:
     return RISCV::SC_D;
   case AtomicOrdering::Release:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::SC_D;
     return RISCV::SC_D_RL;
   case AtomicOrdering::AcquireRelease:
+    if (Subtarget->hasStdExtZtso())
+      return RISCV::SC_D;
     return RISCV::SC_D_RL;
   case AtomicOrdering::SequentiallyConsistent:
     return RISCV::SC_D_RL;
   }
 }
 
-static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
+                            const RISCVSubtarget *Subtarget) {
   if (Width == 32)
-    return getLRForRMW32(Ordering);
+    return getLRForRMW32(Ordering, Subtarget);
   if (Width == 64)
-    return getLRForRMW64(Ordering);
+    return getLRForRMW64(Ordering, Subtarget);
   llvm_unreachable("Unexpected LR width\n");
 }
 
-static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
+                            const RISCVSubtarget *Subtarget) {
   if (Width == 32)
-    return getSCForRMW32(Ordering);
+    return getSCForRMW32(Ordering, Subtarget);
   if (Width == 64)
-    return getSCForRMW64(Ordering);
+    return getSCForRMW64(Ordering, Subtarget);
   llvm_unreachable("Unexpected SC width\n");
 }
 
@@ -236,7 +260,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
                                    DebugLoc DL, MachineBasicBlock *ThisMBB,
                                    MachineBasicBlock *LoopMBB,
                                    MachineBasicBlock *DoneMBB,
-                                   AtomicRMWInst::BinOp BinOp, int Width) {
+                                   AtomicRMWInst::BinOp BinOp, int Width,
+                                   const RISCVSubtarget *STI) {
   Register DestReg = MI.getOperand(0).getReg();
   Register ScratchReg = MI.getOperand(1).getReg();
   Register AddrReg = MI.getOperand(2).getReg();
@@ -249,7 +274,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
   //   binop scratch, dest, val
   //   sc.[w|d] scratch, scratch, (addr)
   //   bnez scratch, loop
-  BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
       .addReg(AddrReg);
   switch (BinOp) {
   default:
@@ -263,7 +288,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
         .addImm(-1);
     break;
   }
-  BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
       .addReg(AddrReg)
       .addReg(ScratchReg);
   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -294,10 +319,13 @@ static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
       .addReg(ScratchReg);
 }
 
-static void doMaskedAtomicBinOpExpansion(
-    const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
-    MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
-    MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
+                                         MachineInstr &MI, DebugLoc DL,
+                                         MachineBasicBlock *ThisMBB,
+                                         MachineBasicBlock *LoopMBB,
+                                         MachineBasicBlock *DoneMBB,
+                                         AtomicRMWInst::BinOp BinOp, int Width,
+                                         const RISCVSubtarget *STI) {
   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
   Register DestReg = MI.getOperand(0).getReg();
   Register ScratchReg = MI.getOperand(1).getReg();
@@ -315,7 +343,7 @@ static void doMaskedAtomicBinOpExpansion(
   //   xor scratch, destreg, scratch
   //   sc.w scratch, scratch, (alignedaddr)
   //   bnez scratch, loop
-  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
       .addReg(AddrReg);
   switch (BinOp) {
   default:
@@ -348,7 +376,7 @@ static void doMaskedAtomicBinOpExpansion(
   insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
                     ScratchReg);
 
-  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
       .addReg(AddrReg)
       .addReg(ScratchReg);
   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -380,10 +408,11 @@ bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
   MBB.addSuccessor(LoopMBB);
 
   if (!IsMasked)
-    doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+    doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
+                           STI);
   else
     doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
-                                 Width);
+                                 Width, STI);
 
   NextMBBI = MBB.end();
   MI.eraseFromParent();
@@ -455,7 +484,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
   //   mv scratch1, destreg
   //   [sext scratch2 if signed min/max]
   //   ifnochangeneeded scratch2, incr, .looptail
-  BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+  BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
       .addReg(AddrReg);
   BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
       .addReg(DestReg)
@@ -507,7 +536,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
   // .looptail:
   //   sc.w scratch1, scratch1, (addr)
   //   bnez scratch1, loop
-  BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+  BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
       .addReg(AddrReg)
       .addReg(Scratch1Reg);
   BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -626,7 +655,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
     // .loophead:
     //   lr.[w|d] dest, (addr)
     //   bne dest, cmpval, done
-    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+            DestReg)
         .addReg(AddrReg);
     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
         .addReg(DestReg)
@@ -635,7 +665,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
     // .looptail:
     //   sc.[w|d] scratch, newval, (addr)
     //   bnez scratch, loophead
-    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+            ScratchReg)
         .addReg(AddrReg)
         .addReg(NewValReg);
     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -648,7 +679,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
     //   and scratch, dest, mask
     //   bne scratch, cmpval, done
     Register MaskReg = MI.getOperand(5).getReg();
-    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+            DestReg)
         .addReg(AddrReg);
     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
         .addReg(DestReg)
@@ -666,7 +698,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
     //   bnez scratch, loophead
     insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
                       MaskReg, ScratchReg);
-    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+            ScratchReg)
         .addReg(AddrReg)
         .addReg(ScratchReg);
     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))

diff  --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index e92f9cd0b615e5..8aeaa11bae5b8e 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -542,6 +542,7 @@ def FeatureStdExtZtso
 def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">,
                               AssemblerPredicate<(all_of FeatureStdExtZtso),
                               "'Ztso' (Memory Model - Total Store Order)">;
+def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">;
 
 def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
                                           "'Zawrs' (Wait on Reservation Set)">;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 48268c9e13e67f..1c578481d40712 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16909,8 +16909,11 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
                                                     Instruction *Inst,
                                                     AtomicOrdering Ord) const {
-  if (Subtarget.hasStdExtZtso())
+  if (Subtarget.hasStdExtZtso()) {
+    if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+      return Builder.CreateFence(Ord);
     return nullptr;
+  }
 
   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
     return Builder.CreateFence(AtomicOrdering::Acquire);

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 8421109b85147e..b06415b620e04e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -133,11 +133,10 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
   def : AtomicStPat<atomic_store_64, SD, GPR, i64>;
 }
 
-let Predicates = [HasStdExtA] in {
-
 /// AMOs
 
 multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
+let Predicates = [HasStdExtA, NotHasStdExtZtso] in {
   def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
                   !cast<RVInst>(BaseInst), vt>;
   def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
@@ -149,6 +148,21 @@ multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
   def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
                   !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
 }
+let Predicates = [HasStdExtA, HasStdExtZtso] in {
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                  !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+                  !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+                  !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                  !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                  !cast<RVInst>(BaseInst), vt>;
+}
+}
+
+let Predicates = [HasStdExtA] in {
 
 defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
 defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;

diff  --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index d04f85ef30783e..f900b5161f7512 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -2,15 +2,15 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IA %s
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IA %s
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IA %s
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IA %s
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
 
 define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-LABEL: cmpxchg_i8_monotonic_monotonic:
@@ -102,28 +102,51 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_acquire_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB1_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB1_1
-; RV32IA-NEXT:  .LBB1_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB1_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB1_1
+; RV32IA-WMO-NEXT:  .LBB1_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB1_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB1_1
+; RV32IA-TSO-NEXT:  .LBB1_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_acquire_monotonic:
 ; RV64I:       # %bb.0:
@@ -138,28 +161,51 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB1_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB1_1
-; RV64IA-NEXT:  .LBB1_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB1_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB1_1
+; RV64IA-WMO-NEXT:  .LBB1_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB1_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB1_1
+; RV64IA-TSO-NEXT:  .LBB1_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
   ret void
 }
@@ -178,28 +224,51 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_acquire_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB2_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB2_1
-; RV32IA-NEXT:  .LBB2_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB2_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB2_1
+; RV32IA-WMO-NEXT:  .LBB2_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB2_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB2_1
+; RV32IA-TSO-NEXT:  .LBB2_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_acquire_acquire:
 ; RV64I:       # %bb.0:
@@ -214,28 +283,51 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_acquire_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB2_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB2_1
-; RV64IA-NEXT:  .LBB2_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB2_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB2_1
+; RV64IA-WMO-NEXT:  .LBB2_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB2_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB2_1
+; RV64IA-TSO-NEXT:  .LBB2_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
   ret void
 }
@@ -254,28 +346,51 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_release_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB3_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB3_1
-; RV32IA-NEXT:  .LBB3_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB3_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB3_1
+; RV32IA-WMO-NEXT:  .LBB3_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB3_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB3_1
+; RV32IA-TSO-NEXT:  .LBB3_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_release_monotonic:
 ; RV64I:       # %bb.0:
@@ -290,28 +405,51 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_release_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB3_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB3_1
-; RV64IA-NEXT:  .LBB3_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB3_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB3_1
+; RV64IA-WMO-NEXT:  .LBB3_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB3_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB3_1
+; RV64IA-TSO-NEXT:  .LBB3_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
   ret void
 }
@@ -330,28 +468,51 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_release_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB4_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB4_1
-; RV32IA-NEXT:  .LBB4_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB4_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB4_1
+; RV32IA-WMO-NEXT:  .LBB4_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB4_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB4_1
+; RV32IA-TSO-NEXT:  .LBB4_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_release_acquire:
 ; RV64I:       # %bb.0:
@@ -366,28 +527,51 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_release_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB4_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB4_1
-; RV64IA-NEXT:  .LBB4_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB4_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB4_1
+; RV64IA-WMO-NEXT:  .LBB4_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB4_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB4_1
+; RV64IA-TSO-NEXT:  .LBB4_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire
   ret void
 }
@@ -406,28 +590,51 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_acq_rel_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB5_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB5_1
-; RV32IA-NEXT:  .LBB5_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB5_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB5_1
+; RV32IA-WMO-NEXT:  .LBB5_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB5_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB5_1
+; RV32IA-TSO-NEXT:  .LBB5_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_acq_rel_monotonic:
 ; RV64I:       # %bb.0:
@@ -442,28 +649,51 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB5_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB5_1
-; RV64IA-NEXT:  .LBB5_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB5_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB5_1
+; RV64IA-WMO-NEXT:  .LBB5_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB5_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB5_1
+; RV64IA-TSO-NEXT:  .LBB5_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
   ret void
 }
@@ -482,28 +712,51 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i8_acq_rel_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    andi a2, a2, 255
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a5, a2, a4
-; RV32IA-NEXT:    bne a5, a1, .LBB6_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
-; RV32IA-NEXT:    xor a5, a2, a0
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a2, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV32IA-NEXT:    bnez a5, .LBB6_1
-; RV32IA-NEXT:  .LBB6_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a5, a2, a4
+; RV32IA-WMO-NEXT:    bne a5, a1, .LBB6_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB6_1
+; RV32IA-WMO-NEXT:  .LBB6_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    andi a2, a2, 255
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a5, a2, a4
+; RV32IA-TSO-NEXT:    bne a5, a1, .LBB6_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a2, a0
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a2, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB6_1
+; RV32IA-TSO-NEXT:  .LBB6_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i8_acq_rel_acquire:
 ; RV64I:       # %bb.0:
@@ -518,28 +771,51 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    andi a2, a2, 255
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a5, a2, a4
-; RV64IA-NEXT:    bne a5, a1, .LBB6_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
-; RV64IA-NEXT:    xor a5, a2, a0
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a2, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a3)
-; RV64IA-NEXT:    bnez a5, .LBB6_1
-; RV64IA-NEXT:  .LBB6_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a5, a2, a4
+; RV64IA-WMO-NEXT:    bne a5, a1, .LBB6_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB6_1
+; RV64IA-WMO-NEXT:  .LBB6_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    andi a2, a2, 255
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a5, a2, a4
+; RV64IA-TSO-NEXT:    bne a5, a1, .LBB6_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a2, a0
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a2, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB6_1
+; RV64IA-TSO-NEXT:  .LBB6_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire
   ret void
 }
@@ -864,29 +1140,53 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_acquire_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB11_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB11_1
-; RV32IA-NEXT:  .LBB11_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB11_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB11_1
+; RV32IA-WMO-NEXT:  .LBB11_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB11_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB11_1
+; RV32IA-TSO-NEXT:  .LBB11_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_acquire_monotonic:
 ; RV64I:       # %bb.0:
@@ -901,29 +1201,53 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_acquire_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB11_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB11_1
-; RV64IA-NEXT:  .LBB11_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB11_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB11_1
+; RV64IA-WMO-NEXT:  .LBB11_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB11_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB11_1
+; RV64IA-TSO-NEXT:  .LBB11_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
   ret void
 }
@@ -942,29 +1266,53 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_acquire_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB12_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB12_1
-; RV32IA-NEXT:  .LBB12_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB12_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB12_1
+; RV32IA-WMO-NEXT:  .LBB12_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB12_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB12_1
+; RV32IA-TSO-NEXT:  .LBB12_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_acquire_acquire:
 ; RV64I:       # %bb.0:
@@ -979,29 +1327,53 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_acquire_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB12_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB12_1
-; RV64IA-NEXT:  .LBB12_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB12_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB12_1
+; RV64IA-WMO-NEXT:  .LBB12_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB12_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB12_1
+; RV64IA-TSO-NEXT:  .LBB12_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
   ret void
 }
@@ -1020,29 +1392,53 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_release_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB13_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB13_1
-; RV32IA-NEXT:  .LBB13_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB13_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB13_1
+; RV32IA-WMO-NEXT:  .LBB13_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB13_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB13_1
+; RV32IA-TSO-NEXT:  .LBB13_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_release_monotonic:
 ; RV64I:       # %bb.0:
@@ -1057,29 +1453,53 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_release_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB13_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB13_1
-; RV64IA-NEXT:  .LBB13_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB13_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB13_1
+; RV64IA-WMO-NEXT:  .LBB13_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB13_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB13_1
+; RV64IA-TSO-NEXT:  .LBB13_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
   ret void
 }
@@ -1098,29 +1518,53 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_release_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB14_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB14_1
-; RV32IA-NEXT:  .LBB14_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB14_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB14_1
+; RV32IA-WMO-NEXT:  .LBB14_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB14_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB14_1
+; RV32IA-TSO-NEXT:  .LBB14_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_release_acquire:
 ; RV64I:       # %bb.0:
@@ -1135,29 +1579,53 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_release_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB14_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB14_1
-; RV64IA-NEXT:  .LBB14_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB14_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB14_1
+; RV64IA-WMO-NEXT:  .LBB14_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB14_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB14_1
+; RV64IA-TSO-NEXT:  .LBB14_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire
   ret void
 }
@@ -1176,29 +1644,53 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_acq_rel_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB15_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB15_1
-; RV32IA-NEXT:  .LBB15_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB15_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB15_1
+; RV32IA-WMO-NEXT:  .LBB15_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB15_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB15_1
+; RV32IA-TSO-NEXT:  .LBB15_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_acq_rel_monotonic:
 ; RV64I:       # %bb.0:
@@ -1213,29 +1705,53 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_acq_rel_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB15_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB15_1
-; RV64IA-NEXT:  .LBB15_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB15_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB15_1
+; RV64IA-WMO-NEXT:  .LBB15_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB15_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB15_1
+; RV64IA-TSO-NEXT:  .LBB15_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
   ret void
 }
@@ -1254,29 +1770,53 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i16_acq_rel_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a3, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a5, a4, a0
-; RV32IA-NEXT:    and a1, a1, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    and a2, a2, a4
-; RV32IA-NEXT:    sll a0, a2, a0
-; RV32IA-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a3)
-; RV32IA-NEXT:    and a4, a2, a5
-; RV32IA-NEXT:    bne a4, a1, .LBB16_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
-; RV32IA-NEXT:    xor a4, a2, a0
-; RV32IA-NEXT:    and a4, a4, a5
-; RV32IA-NEXT:    xor a4, a2, a4
-; RV32IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV32IA-NEXT:    bnez a4, .LBB16_1
-; RV32IA-NEXT:  .LBB16_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a4
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    and a2, a2, a4
+; RV32IA-WMO-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-NEXT:    and a4, a2, a5
+; RV32IA-WMO-NEXT:    bne a4, a1, .LBB16_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-NEXT:    and a4, a4, a5
+; RV32IA-WMO-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB16_1
+; RV32IA-WMO-NEXT:  .LBB16_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a3, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a5, a4, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a4
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    and a2, a2, a4
+; RV32IA-TSO-NEXT:    sll a0, a2, a0
+; RV32IA-TSO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a3)
+; RV32IA-TSO-NEXT:    and a4, a2, a5
+; RV32IA-TSO-NEXT:    bne a4, a1, .LBB16_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a4, a2, a0
+; RV32IA-TSO-NEXT:    and a4, a4, a5
+; RV32IA-TSO-NEXT:    xor a4, a2, a4
+; RV32IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB16_1
+; RV32IA-TSO-NEXT:  .LBB16_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i16_acq_rel_acquire:
 ; RV64I:       # %bb.0:
@@ -1291,29 +1831,53 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i16_acq_rel_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a5, a4, a0
-; RV64IA-NEXT:    and a1, a1, a4
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    and a2, a2, a4
-; RV64IA-NEXT:    sllw a0, a2, a0
-; RV64IA-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a3)
-; RV64IA-NEXT:    and a4, a2, a5
-; RV64IA-NEXT:    bne a4, a1, .LBB16_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
-; RV64IA-NEXT:    xor a4, a2, a0
-; RV64IA-NEXT:    and a4, a4, a5
-; RV64IA-NEXT:    xor a4, a2, a4
-; RV64IA-NEXT:    sc.w.rl a4, a4, (a3)
-; RV64IA-NEXT:    bnez a4, .LBB16_1
-; RV64IA-NEXT:  .LBB16_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a4
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    and a2, a2, a4
+; RV64IA-WMO-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-NEXT:    and a4, a2, a5
+; RV64IA-WMO-NEXT:    bne a4, a1, .LBB16_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-NEXT:    and a4, a4, a5
+; RV64IA-WMO-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB16_1
+; RV64IA-WMO-NEXT:  .LBB16_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a3, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a5, a4, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a4
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    and a2, a2, a4
+; RV64IA-TSO-NEXT:    sllw a0, a2, a0
+; RV64IA-TSO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a3)
+; RV64IA-TSO-NEXT:    and a4, a2, a5
+; RV64IA-TSO-NEXT:    bne a4, a1, .LBB16_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a4, a2, a0
+; RV64IA-TSO-NEXT:    and a4, a4, a5
+; RV64IA-TSO-NEXT:    xor a4, a2, a4
+; RV64IA-TSO-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB16_1
+; RV64IA-TSO-NEXT:  .LBB16_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire
   ret void
 }
@@ -1619,16 +2183,27 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_acquire_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB21_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
-; RV32IA-NEXT:    sc.w a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB21_1
-; RV32IA-NEXT:  .LBB21_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB21_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB21_1
+; RV32IA-WMO-NEXT:  .LBB21_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB21_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB21_1
+; RV32IA-TSO-NEXT:  .LBB21_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_acquire_monotonic:
 ; RV64I:       # %bb.0:
@@ -1643,17 +2218,29 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_acquire_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB21_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
-; RV64IA-NEXT:    sc.w a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB21_1
-; RV64IA-NEXT:  .LBB21_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB21_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB21_1
+; RV64IA-WMO-NEXT:  .LBB21_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB21_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB21_1
+; RV64IA-TSO-NEXT:  .LBB21_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
   ret void
 }
@@ -1672,16 +2259,27 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_acquire_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB22_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
-; RV32IA-NEXT:    sc.w a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB22_1
-; RV32IA-NEXT:  .LBB22_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB22_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB22_1
+; RV32IA-WMO-NEXT:  .LBB22_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB22_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB22_1
+; RV32IA-TSO-NEXT:  .LBB22_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_acquire_acquire:
 ; RV64I:       # %bb.0:
@@ -1696,17 +2294,29 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_acquire_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB22_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
-; RV64IA-NEXT:    sc.w a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB22_1
-; RV64IA-NEXT:  .LBB22_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB22_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB22_1
+; RV64IA-WMO-NEXT:  .LBB22_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB22_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB22_1
+; RV64IA-TSO-NEXT:  .LBB22_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
   ret void
 }
@@ -1725,16 +2335,27 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_release_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB23_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB23_1
-; RV32IA-NEXT:  .LBB23_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB23_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB23_1
+; RV32IA-WMO-NEXT:  .LBB23_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB23_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB23_1
+; RV32IA-TSO-NEXT:  .LBB23_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_release_monotonic:
 ; RV64I:       # %bb.0:
@@ -1749,17 +2370,29 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_release_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB23_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB23_1
-; RV64IA-NEXT:  .LBB23_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB23_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB23_1
+; RV64IA-WMO-NEXT:  .LBB23_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB23_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB23_1
+; RV64IA-TSO-NEXT:  .LBB23_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
   ret void
 }
@@ -1778,16 +2411,27 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_release_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB24_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB24_1
-; RV32IA-NEXT:  .LBB24_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB24_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB24_1
+; RV32IA-WMO-NEXT:  .LBB24_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB24_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB24_1
+; RV32IA-TSO-NEXT:  .LBB24_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_release_acquire:
 ; RV64I:       # %bb.0:
@@ -1802,17 +2446,29 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_release_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB24_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB24_1
-; RV64IA-NEXT:  .LBB24_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB24_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB24_1
+; RV64IA-WMO-NEXT:  .LBB24_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB24_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB24_1
+; RV64IA-TSO-NEXT:  .LBB24_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire
   ret void
 }
@@ -1831,16 +2487,27 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_acq_rel_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB25_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB25_1
-; RV32IA-NEXT:  .LBB25_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB25_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB25_1
+; RV32IA-WMO-NEXT:  .LBB25_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB25_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB25_1
+; RV32IA-TSO-NEXT:  .LBB25_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic:
 ; RV64I:       # %bb.0:
@@ -1855,17 +2522,29 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_acq_rel_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB25_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB25_1
-; RV64IA-NEXT:  .LBB25_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB25_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB25_1
+; RV64IA-WMO-NEXT:  .LBB25_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB25_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB25_1
+; RV64IA-TSO-NEXT:  .LBB25_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
   ret void
 }
@@ -1884,16 +2563,27 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_acq_rel_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB26_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB26_1
-; RV32IA-NEXT:  .LBB26_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB26_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB26_1
+; RV32IA-WMO-NEXT:  .LBB26_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB26_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB26_1
+; RV32IA-TSO-NEXT:  .LBB26_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire:
 ; RV64I:       # %bb.0:
@@ -1908,17 +2598,29 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_acq_rel_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB26_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB26_1
-; RV64IA-NEXT:  .LBB26_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB26_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB26_1
+; RV64IA-WMO-NEXT:  .LBB26_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB26_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB26_1
+; RV64IA-TSO-NEXT:  .LBB26_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire
   ret void
 }
@@ -2190,16 +2892,27 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_acquire_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB31_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB31_1 Depth=1
-; RV64IA-NEXT:    sc.d a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB31_1
-; RV64IA-NEXT:  .LBB31_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB31_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB31_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB31_1
+; RV64IA-WMO-NEXT:  .LBB31_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB31_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB31_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB31_1
+; RV64IA-TSO-NEXT:  .LBB31_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
   ret void
 }
@@ -2252,16 +2965,27 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_acquire_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB32_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB32_1 Depth=1
-; RV64IA-NEXT:    sc.d a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB32_1
-; RV64IA-NEXT:  .LBB32_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB32_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB32_1
+; RV64IA-WMO-NEXT:  .LBB32_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB32_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB32_1
+; RV64IA-TSO-NEXT:  .LBB32_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
   ret void
 }
@@ -2314,16 +3038,27 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_release_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB33_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB33_1
-; RV64IA-NEXT:  .LBB33_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB33_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB33_1
+; RV64IA-WMO-NEXT:  .LBB33_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB33_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB33_1
+; RV64IA-TSO-NEXT:  .LBB33_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
   ret void
 }
@@ -2376,16 +3111,27 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_release_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB34_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB34_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB34_1
-; RV64IA-NEXT:  .LBB34_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB34_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB34_1
+; RV64IA-WMO-NEXT:  .LBB34_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB34_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB34_1
+; RV64IA-TSO-NEXT:  .LBB34_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire
   ret void
 }
@@ -2438,16 +3184,27 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB35_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB35_1
-; RV64IA-NEXT:  .LBB35_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB35_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB35_1
+; RV64IA-WMO-NEXT:  .LBB35_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB35_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB35_1
+; RV64IA-TSO-NEXT:  .LBB35_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
   ret void
 }
@@ -2500,16 +3257,27 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_acq_rel_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB36_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB36_1
-; RV64IA-NEXT:  .LBB36_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB36_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB36_1
+; RV64IA-WMO-NEXT:  .LBB36_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB36_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB36_1
+; RV64IA-TSO-NEXT:  .LBB36_3:
+; RV64IA-TSO-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire
   ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
index 1ca234db4c78e6..d3488ebed89f60 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
@@ -1010,6 +1010,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32IA-TSO-LABEL: atomic_store_i8_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    sb a1, 0(a0)
+; RV32IA-TSO-NEXT:    fence rw, rw
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_store_i8_seq_cst:
@@ -1031,6 +1032,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-LABEL: atomic_store_i8_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sb a1, 0(a0)
+; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
@@ -1043,6 +1045,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
@@ -1055,6 +1058,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i8 %b, ptr %a seq_cst, align 1
   ret void
@@ -1216,6 +1220,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32IA-TSO-LABEL: atomic_store_i16_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    sh a1, 0(a0)
+; RV32IA-TSO-NEXT:    fence rw, rw
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_store_i16_seq_cst:
@@ -1237,6 +1242,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-LABEL: atomic_store_i16_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sh a1, 0(a0)
+; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
@@ -1249,6 +1255,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
@@ -1261,6 +1268,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i16 %b, ptr %a seq_cst, align 2
   ret void
@@ -1422,6 +1430,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32IA-TSO-LABEL: atomic_store_i32_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    sw a1, 0(a0)
+; RV32IA-TSO-NEXT:    fence rw, rw
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_store_i32_seq_cst:
@@ -1443,6 +1452,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-LABEL: atomic_store_i32_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sw a1, 0(a0)
+; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
@@ -1455,6 +1465,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
@@ -1467,6 +1478,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i32 %b, ptr %a seq_cst, align 4
   ret void
@@ -1646,6 +1658,7 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO-LABEL: atomic_store_i64_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sd a1, 0(a0)
+; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst:
@@ -1658,6 +1671,7 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i64 %b, ptr %a seq_cst, align 8
   ret void

diff  --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 813bbd100f8efa..c47d4484d8c0d8 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -2,15 +2,15 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IA %s
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IA %s
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IA %s
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IA %s
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
 
 define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-LABEL: atomicrmw_xchg_i8_monotonic:
@@ -87,25 +87,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB1_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB1_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB1_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -117,25 +137,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB1_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB1_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB1_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -151,25 +191,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB2_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB2_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB2_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_release:
 ; RV64I:       # %bb.0:
@@ -181,25 +241,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB2_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB2_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB2_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i8 %b release
   ret i8 %1
 }
@@ -215,25 +295,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB3_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB3_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB3_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -245,25 +345,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB3_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB3_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB3_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -880,25 +1000,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    add a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB16_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB16_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB16_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -910,25 +1050,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    add a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB16_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    add a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB16_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    add a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB16_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -944,25 +1104,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    add a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB17_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB17_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB17_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_release:
 ; RV64I:       # %bb.0:
@@ -974,25 +1154,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    add a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB17_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    add a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB17_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    add a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB17_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i8 %b release
   ret i8 %1
 }
@@ -1008,25 +1208,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    add a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB18_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB18_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB18_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -1038,25 +1258,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    add a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB18_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    add a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB18_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    add a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB18_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -1200,25 +1440,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    sub a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB21_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB21_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB21_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -1230,25 +1490,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_sub_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    sub a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB21_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_sub_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB21_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB21_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -1264,25 +1544,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    sub a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB22_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB22_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB22_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_release:
 ; RV64I:       # %bb.0:
@@ -1294,25 +1594,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_sub_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    sub a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB22_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_sub_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB22_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB22_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i8 %b release
   ret i8 %1
 }
@@ -1328,25 +1648,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    sub a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB23_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB23_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB23_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -1358,25 +1698,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    sub a5, a4, a1
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB23_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a4, a1
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB23_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a4, a1
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB23_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -1782,26 +2142,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a5, a4, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB31_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB31_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB31_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -1813,26 +2194,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a5, a4, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB31_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a5, a4, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB31_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a5, a4, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB31_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -1848,26 +2250,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a5, a4, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB32_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB32_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB32_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_release:
 ; RV64I:       # %bb.0:
@@ -1879,26 +2302,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    and a5, a4, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB32_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    and a5, a4, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB32_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a5, a4, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB32_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i8 %b release
   ret i8 %1
 }
@@ -1914,26 +2358,47 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a5, a4, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB33_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB33_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB33_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -1943,28 +2408,49 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    call __atomic_fetch_nand_1 at plt
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:    ret
-;
-; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a5, a4, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB33_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a5, a4, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB33_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a5, a4, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB33_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -2664,34 +3150,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a7, a1, .LBB46_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB46_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB46_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB46_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -2735,34 +3250,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_max_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a7, a1, .LBB46_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV64IA-NEXT:    sc.w a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB46_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_max_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB46_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB46_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB46_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB46_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -2810,34 +3354,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a7, a1, .LBB47_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB47_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB47_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB47_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_release:
 ; RV64I:       # %bb.0:
@@ -2881,34 +3454,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_max_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a7, a1, .LBB47_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB47_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_max_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB47_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB47_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB47_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB47_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i8 %b release
   ret i8 %1
 }
@@ -2956,34 +3558,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a7, a1, .LBB48_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB48_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB48_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB48_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -3027,34 +3658,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_max_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a7, a1, .LBB48_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB48_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_max_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB48_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB48_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB48_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB48_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -3394,34 +4054,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a1, a7, .LBB51_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB51_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB51_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB51_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -3465,34 +4154,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a1, a7, .LBB51_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV64IA-NEXT:    sc.w a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB51_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB51_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB51_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB51_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB51_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -3540,34 +4258,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a1, a7, .LBB52_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB52_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB52_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB52_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_release:
 ; RV64I:       # %bb.0:
@@ -3611,34 +4358,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a1, a7, .LBB52_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB52_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB52_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB52_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB52_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB52_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i8 %b release
   ret i8 %1
 }
@@ -3667,53 +4443,82 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    call __atomic_compare_exchange_1 at plt
 ; RV32I-NEXT:    lbu a3, 15(sp)
 ; RV32I-NEXT:    bnez a0, .LBB53_4
-; RV32I-NEXT:  .LBB53_2: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a0, a3, 24
-; RV32I-NEXT:    srai a0, a0, 24
-; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s2, a0, .LBB53_1
-; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    j .LBB53_1
-; RV32I-NEXT:  .LBB53_4: # %atomicrmw.end
-; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
-; RV32I-NEXT:    ret
-;
-; RV32IA-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a3, a3, 24
-; RV32IA-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a4
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a3
-; RV32IA-NEXT:    sra a7, a7, a3
-; RV32IA-NEXT:    bge a1, a7, .LBB53_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB53_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32I-NEXT:  .LBB53_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a0, a3, 24
+; RV32I-NEXT:    srai a0, a0, 24
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    bge s2, a0, .LBB53_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    j .LBB53_1
+; RV32I-NEXT:  .LBB53_4: # %atomicrmw.end
+; RV32I-NEXT:    mv a0, a3
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    li a4, 255
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    xori a3, a3, 24
+; RV32IA-WMO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NEXT:    and a7, a5, a4
+; RV32IA-WMO-NEXT:    mv a6, a5
+; RV32IA-WMO-NEXT:    sll a7, a7, a3
+; RV32IA-WMO-NEXT:    sra a7, a7, a3
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB53_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    li a4, 255
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    xori a3, a3, 24
+; RV32IA-TSO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NEXT:    and a7, a5, a4
+; RV32IA-TSO-NEXT:    mv a6, a5
+; RV32IA-TSO-NEXT:    sll a7, a7, a3
+; RV32IA-TSO-NEXT:    sra a7, a7, a3
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB53_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -3757,34 +4562,63 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 56
-; RV64IA-NEXT:    srai a1, a1, 56
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    xori a3, a3, 56
-; RV64IA-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a2)
-; RV64IA-NEXT:    and a7, a5, a4
-; RV64IA-NEXT:    mv a6, a5
-; RV64IA-NEXT:    sll a7, a7, a3
-; RV64IA-NEXT:    sra a7, a7, a3
-; RV64IA-NEXT:    bge a1, a7, .LBB53_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV64IA-NEXT:    xor a6, a5, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a5, a6
-; RV64IA-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB53_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a5, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    li a4, 255
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 56
+; RV64IA-WMO-NEXT:    srai a1, a1, 56
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    xori a3, a3, 56
+; RV64IA-WMO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-WMO-NEXT:    and a7, a5, a4
+; RV64IA-WMO-NEXT:    mv a6, a5
+; RV64IA-WMO-NEXT:    sll a7, a7, a3
+; RV64IA-WMO-NEXT:    sra a7, a7, a3
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB53_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a5, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a5, a6
+; RV64IA-WMO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB53_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a5, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    li a4, 255
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 56
+; RV64IA-TSO-NEXT:    srai a1, a1, 56
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    xori a3, a3, 56
+; RV64IA-TSO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a5, (a2)
+; RV64IA-TSO-NEXT:    and a7, a5, a4
+; RV64IA-TSO-NEXT:    mv a6, a5
+; RV64IA-TSO-NEXT:    sll a7, a7, a3
+; RV64IA-TSO-NEXT:    sra a7, a7, a3
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB53_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a5, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a5, a6
+; RV64IA-TSO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB53_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a5, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -4108,29 +4942,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a6, a1, .LBB56_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB56_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB56_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB56_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -4172,29 +5030,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umax_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a6, a1, .LBB56_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB56_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umax_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB56_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB56_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB56_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB56_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -4240,29 +5122,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a6, a1, .LBB57_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB57_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB57_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB57_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_release:
 ; RV64I:       # %bb.0:
@@ -4304,29 +5210,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umax_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a6, a1, .LBB57_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB57_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umax_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB57_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB57_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB57_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB57_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i8 %b release
   ret i8 %1
 }
@@ -4372,29 +5302,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a6, a1, .LBB58_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB58_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB58_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB58_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -4436,29 +5390,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a6, a1, .LBB58_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB58_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB58_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB58_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB58_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB58_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -4768,29 +5746,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a6, .LBB61_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB61_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB61_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB61_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -4832,29 +5834,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i8_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a6, .LBB61_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB61_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB61_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB61_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB61_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB61_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i8 %b acquire
   ret i8 %1
 }
@@ -4900,29 +5926,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i8_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a6, .LBB62_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB62_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB62_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB62_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_release:
 ; RV64I:       # %bb.0:
@@ -4964,29 +6014,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i8_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a6, .LBB62_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB62_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i8_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB62_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB62_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i8_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB62_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB62_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i8 %b release
   ret i8 %1
 }
@@ -5032,29 +6106,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    andi a1, a1, 255
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a6, .LBB63_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB63_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    li a3, 255
+; RV32IA-WMO-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB63_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    li a3, 255
+; RV32IA-TSO-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NEXT:    andi a1, a1, 255
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB63_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -5096,29 +6194,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    li a3, 255
-; RV64IA-NEXT:    sllw a3, a3, a0
-; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a2)
-; RV64IA-NEXT:    and a6, a4, a3
-; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a6, .LBB63_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV64IA-NEXT:    xor a5, a4, a1
-; RV64IA-NEXT:    and a5, a5, a3
-; RV64IA-NEXT:    xor a5, a4, a5
-; RV64IA-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB63_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a4, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    li a3, 255
+; RV64IA-WMO-NEXT:    sllw a3, a3, a0
+; RV64IA-WMO-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-WMO-NEXT:    and a6, a4, a3
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB63_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a4, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a3
+; RV64IA-WMO-NEXT:    xor a5, a4, a5
+; RV64IA-WMO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB63_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a4, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    li a3, 255
+; RV64IA-TSO-NEXT:    sllw a3, a3, a0
+; RV64IA-TSO-NEXT:    andi a1, a1, 255
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a4, (a2)
+; RV64IA-TSO-NEXT:    and a6, a4, a3
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB63_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a4, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a3
+; RV64IA-TSO-NEXT:    xor a5, a4, a5
+; RV64IA-TSO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB63_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a4, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -5328,30 +6450,51 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li a2, 2
 ; RV32I-NEXT:    call __atomic_exchange_2 at plt
-; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 16
-; RV32I-NEXT:    ret
-;
-; RV32IA-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB66_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB66_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB66_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -5363,26 +6506,47 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB66_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB66_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB66_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -5398,26 +6562,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB67_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB67_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB67_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_release:
 ; RV64I:       # %bb.0:
@@ -5429,26 +6614,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB67_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB67_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB67_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b release
   ret i16 %1
 }
@@ -5464,26 +6670,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB68_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    mv a5, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB68_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    mv a5, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB68_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -5495,26 +6722,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    mv a5, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB68_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    mv a5, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB68_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    mv a5, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB68_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -6165,26 +7413,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    add a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB81_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB81_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB81_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -6196,26 +7465,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    add a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB81_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    add a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB81_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    add a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB81_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -6231,26 +7521,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    add a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB82_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB82_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB82_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_release:
 ; RV64I:       # %bb.0:
@@ -6262,26 +7573,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    add a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB82_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    add a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB82_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    add a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB82_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b release
   ret i16 %1
 }
@@ -6297,26 +7629,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    add a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB83_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB83_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB83_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -6328,26 +7681,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_add_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    add a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB83_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_add_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    add a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB83_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_add_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    add a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB83_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -6495,26 +7869,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    sub a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB86_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB86_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB86_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -6526,26 +7921,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_sub_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    sub a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB86_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_sub_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB86_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB86_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -6561,57 +7977,99 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    sub a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB87_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB87_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB87_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_release:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    li a2, 3
-; RV64I-NEXT:    call __atomic_fetch_sub_2 at plt
-; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:    ret
-;
-; RV64IA-LABEL: atomicrmw_sub_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    sub a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB87_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64I-NEXT:    li a2, 3
+; RV64I-NEXT:    call __atomic_fetch_sub_2 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomicrmw_sub_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB87_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB87_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b release
   ret i16 %1
 }
@@ -6627,26 +8085,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    sub a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB88_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB88_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB88_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -6658,26 +8137,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    sub a5, a3, a1
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB88_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    sub a5, a3, a1
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB88_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    sub a5, a3, a1
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB88_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -7097,27 +8597,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a5, a3, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB96_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB96_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB96_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -7129,27 +8651,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a5, a3, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB96_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a5, a3, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB96_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a5, a3, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB96_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -7165,27 +8709,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a5, a3, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB97_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB97_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB97_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_release:
 ; RV64I:       # %bb.0:
@@ -7197,27 +8763,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    and a5, a3, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB97_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    and a5, a3, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB97_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a5, a3, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB97_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b release
   ret i16 %1
 }
@@ -7233,27 +8821,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a5, a3, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB98_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NEXT:    not a5, a5
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB98_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NEXT:    not a5, a5
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB98_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -7265,27 +8875,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a5, a3, a1
-; RV64IA-NEXT:    not a5, a5
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB98_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a5, a3, a1
+; RV64IA-WMO-NEXT:    not a5, a5
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB98_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a5, a3, a1
+; RV64IA-TSO-NEXT:    not a5, a5
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB98_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -8011,36 +9643,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a7, a1, .LBB111_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB111_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB111_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB111_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -8084,36 +9747,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_max_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a7, a1, .LBB111_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV64IA-NEXT:    sc.w a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB111_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_max_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB111_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB111_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB111_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB111_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -8161,36 +9855,67 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a7, a1, .LBB112_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB112_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB112_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB112_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_release:
 ; RV64I:       # %bb.0:
@@ -8214,56 +9939,87 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2 at plt
 ; RV64I-NEXT:    lh a3, 14(sp)
-; RV64I-NEXT:    bnez a0, .LBB112_4
-; RV64I-NEXT:  .LBB112_2: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a0, a3, 48
-; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s2, a0, .LBB112_1
-; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB112_2 Depth=1
-; RV64I-NEXT:    mv a2, s1
-; RV64I-NEXT:    j .LBB112_1
-; RV64I-NEXT:  .LBB112_4: # %atomicrmw.end
-; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 48
-; RV64I-NEXT:    ret
-;
-; RV64IA-LABEL: atomicrmw_max_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a7, a1, .LBB112_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB112_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64I-NEXT:    bnez a0, .LBB112_4
+; RV64I-NEXT:  .LBB112_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a0, a3, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    blt s2, a0, .LBB112_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB112_2 Depth=1
+; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    j .LBB112_1
+; RV64I-NEXT:  .LBB112_4: # %atomicrmw.end
+; RV64I-NEXT:    mv a0, a3
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomicrmw_max_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB112_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB112_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB112_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB112_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i16 %b release
   ret i16 %1
 }
@@ -8311,36 +10067,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a7, a1, .LBB113_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB113_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB113_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB113_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -8384,36 +10171,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_max_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a7, a1, .LBB113_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB113_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_max_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a7, a1, .LBB113_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB113_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_max_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a7, a1, .LBB113_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB113_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw max ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -8761,36 +10579,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a1, a7, .LBB116_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB116_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB116_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB116_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -8834,36 +10683,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a1, a7, .LBB116_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV64IA-NEXT:    sc.w a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB116_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB116_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB116_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB116_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB116_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -8911,36 +10791,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a1, a7, .LBB117_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB117_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB117_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB117_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_release:
 ; RV64I:       # %bb.0:
@@ -8984,36 +10895,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a1, a7, .LBB117_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB117_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB117_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB117_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB117_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB117_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i16 %b release
   ret i16 %1
 }
@@ -9061,36 +11003,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a3, a0, 24
-; RV32IA-NEXT:    lui a4, 16
-; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a4, a4, a0
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 16
-; RV32IA-NEXT:    sub a5, a5, a3
-; RV32IA-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a7, a3, a4
-; RV32IA-NEXT:    mv a6, a3
-; RV32IA-NEXT:    sll a7, a7, a5
-; RV32IA-NEXT:    sra a7, a7, a5
-; RV32IA-NEXT:    bge a1, a7, .LBB118_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-NEXT:    xor a6, a3, a1
-; RV32IA-NEXT:    and a6, a6, a4
-; RV32IA-NEXT:    xor a6, a3, a6
-; RV32IA-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB118_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    andi a3, a0, 24
+; RV32IA-WMO-NEXT:    lui a4, 16
+; RV32IA-WMO-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:    li a5, 16
+; RV32IA-WMO-NEXT:    sub a5, a5, a3
+; RV32IA-WMO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a7, a3, a4
+; RV32IA-WMO-NEXT:    mv a6, a3
+; RV32IA-WMO-NEXT:    sll a7, a7, a5
+; RV32IA-WMO-NEXT:    sra a7, a7, a5
+; RV32IA-WMO-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a6, a3, a1
+; RV32IA-WMO-NEXT:    and a6, a6, a4
+; RV32IA-WMO-NEXT:    xor a6, a3, a6
+; RV32IA-WMO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NEXT:    bnez a6, .LBB118_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    andi a3, a0, 24
+; RV32IA-TSO-NEXT:    lui a4, 16
+; RV32IA-TSO-NEXT:    addi a4, a4, -1
+; RV32IA-TSO-NEXT:    sll a4, a4, a0
+; RV32IA-TSO-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:    li a5, 16
+; RV32IA-TSO-NEXT:    sub a5, a5, a3
+; RV32IA-TSO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a7, a3, a4
+; RV32IA-TSO-NEXT:    mv a6, a3
+; RV32IA-TSO-NEXT:    sll a7, a7, a5
+; RV32IA-TSO-NEXT:    sra a7, a7, a5
+; RV32IA-TSO-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a6, a3, a1
+; RV32IA-TSO-NEXT:    and a6, a6, a4
+; RV32IA-TSO-NEXT:    xor a6, a3, a6
+; RV32IA-TSO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NEXT:    bnez a6, .LBB118_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -9134,36 +11107,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_min_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    andi a3, a0, 24
-; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a4, a4, a0
-; RV64IA-NEXT:    slli a1, a1, 48
-; RV64IA-NEXT:    srai a1, a1, 48
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 48
-; RV64IA-NEXT:    sub a5, a5, a3
-; RV64IA-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a7, a3, a4
-; RV64IA-NEXT:    mv a6, a3
-; RV64IA-NEXT:    sll a7, a7, a5
-; RV64IA-NEXT:    sra a7, a7, a5
-; RV64IA-NEXT:    bge a1, a7, .LBB118_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV64IA-NEXT:    xor a6, a3, a1
-; RV64IA-NEXT:    and a6, a6, a4
-; RV64IA-NEXT:    xor a6, a3, a6
-; RV64IA-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV64IA-NEXT:    bnez a6, .LBB118_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_min_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    andi a3, a0, 24
+; RV64IA-WMO-NEXT:    lui a4, 16
+; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-NEXT:    slli a1, a1, 48
+; RV64IA-WMO-NEXT:    srai a1, a1, 48
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:    li a5, 48
+; RV64IA-WMO-NEXT:    sub a5, a5, a3
+; RV64IA-WMO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a7, a3, a4
+; RV64IA-WMO-NEXT:    mv a6, a3
+; RV64IA-WMO-NEXT:    sll a7, a7, a5
+; RV64IA-WMO-NEXT:    sra a7, a7, a5
+; RV64IA-WMO-NEXT:    bge a1, a7, .LBB118_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a6, a3, a1
+; RV64IA-WMO-NEXT:    and a6, a6, a4
+; RV64IA-WMO-NEXT:    xor a6, a3, a6
+; RV64IA-WMO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-WMO-NEXT:    bnez a6, .LBB118_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_min_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    andi a3, a0, 24
+; RV64IA-TSO-NEXT:    lui a4, 16
+; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    sllw a4, a4, a0
+; RV64IA-TSO-NEXT:    slli a1, a1, 48
+; RV64IA-TSO-NEXT:    srai a1, a1, 48
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:    li a5, 48
+; RV64IA-TSO-NEXT:    sub a5, a5, a3
+; RV64IA-TSO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a7, a3, a4
+; RV64IA-TSO-NEXT:    mv a6, a3
+; RV64IA-TSO-NEXT:    sll a7, a7, a5
+; RV64IA-TSO-NEXT:    sra a7, a7, a5
+; RV64IA-TSO-NEXT:    bge a1, a7, .LBB118_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a6, a3, a1
+; RV64IA-TSO-NEXT:    and a6, a6, a4
+; RV64IA-TSO-NEXT:    xor a6, a3, a6
+; RV64IA-TSO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-TSO-NEXT:    bnez a6, .LBB118_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw min ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -9505,30 +11509,55 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a6, a1, .LBB121_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB121_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB121_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB121_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -9558,46 +11587,71 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:  .LBB121_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    and a0, a1, s2
-; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s3, a0, .LBB121_1
-; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB121_2 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:    j .LBB121_1
-; RV64I-NEXT:  .LBB121_4: # %atomicrmw.end
-; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 48
-; RV64I-NEXT:    ret
-;
-; RV64IA-LABEL: atomicrmw_umax_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a6, a1, .LBB121_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB121_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64I-NEXT:    mv a2, a1
+; RV64I-NEXT:    bltu s3, a0, .LBB121_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB121_2 Depth=1
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB121_1
+; RV64I-NEXT:  .LBB121_4: # %atomicrmw.end
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomicrmw_umax_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB121_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB121_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB121_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB121_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -9647,30 +11701,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a6, a1, .LBB122_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB122_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB122_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB122_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_release:
 ; RV64I:       # %bb.0:
@@ -9716,30 +11795,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umax_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a6, a1, .LBB122_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB122_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umax_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB122_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB122_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB122_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB122_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b release
   ret i16 %1
 }
@@ -9789,30 +11893,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a6, a1, .LBB123_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB123_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB123_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB123_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -9858,30 +11987,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a6, a1, .LBB123_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB123_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a6, a1, .LBB123_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB123_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a6, a1, .LBB123_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB123_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -10215,30 +12369,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a6, .LBB126_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB126_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB126_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB126_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -10284,30 +12463,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i16_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a6, .LBB126_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB126_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB126_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB126_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB126_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB126_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b acquire
   ret i16 %1
 }
@@ -10357,30 +12561,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i16_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a6, .LBB127_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB127_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB127_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB127_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_release:
 ; RV64I:       # %bb.0:
@@ -10426,30 +12655,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i16_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a6, .LBB127_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB127_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i16_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB127_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB127_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i16_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB127_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB127_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b release
   ret i16 %1
 }
@@ -10499,30 +12753,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a6, .LBB128_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB128_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NEXT:    lui a3, 16
+; RV32IA-WMO-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NEXT:    mv a5, a3
+; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NEXT:    bnez a5, .LBB128_1
+; RV32IA-WMO-NEXT:  # %bb.4:
+; RV32IA-WMO-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NEXT:    lui a3, 16
+; RV32IA-TSO-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NEXT:    mv a5, a3
+; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NEXT:    bnez a5, .LBB128_1
+; RV32IA-TSO-NEXT:  # %bb.4:
+; RV32IA-TSO-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -10568,30 +12847,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, -4
-; RV64IA-NEXT:    slli a0, a0, 3
-; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
-; RV64IA-NEXT:    sllw a4, a3, a0
-; RV64IA-NEXT:    and a1, a1, a3
-; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a2)
-; RV64IA-NEXT:    and a6, a3, a4
-; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a6, .LBB128_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV64IA-NEXT:    xor a5, a3, a1
-; RV64IA-NEXT:    and a5, a5, a4
-; RV64IA-NEXT:    xor a5, a3, a5
-; RV64IA-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV64IA-NEXT:    bnez a5, .LBB128_1
-; RV64IA-NEXT:  # %bb.4:
-; RV64IA-NEXT:    srlw a0, a3, a0
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    andi a2, a0, -4
+; RV64IA-WMO-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-NEXT:    lui a3, 16
+; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    sllw a4, a3, a0
+; RV64IA-WMO-NEXT:    and a1, a1, a3
+; RV64IA-WMO-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-WMO-NEXT:    and a6, a3, a4
+; RV64IA-WMO-NEXT:    mv a5, a3
+; RV64IA-WMO-NEXT:    bgeu a1, a6, .LBB128_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV64IA-WMO-NEXT:    xor a5, a3, a1
+; RV64IA-WMO-NEXT:    and a5, a5, a4
+; RV64IA-WMO-NEXT:    xor a5, a3, a5
+; RV64IA-WMO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64IA-WMO-NEXT:    bnez a5, .LBB128_1
+; RV64IA-WMO-NEXT:  # %bb.4:
+; RV64IA-WMO-NEXT:    srlw a0, a3, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    andi a2, a0, -4
+; RV64IA-TSO-NEXT:    slli a0, a0, 3
+; RV64IA-TSO-NEXT:    lui a3, 16
+; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    sllw a4, a3, a0
+; RV64IA-TSO-NEXT:    and a1, a1, a3
+; RV64IA-TSO-NEXT:    sllw a1, a1, a0
+; RV64IA-TSO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a2)
+; RV64IA-TSO-NEXT:    and a6, a3, a4
+; RV64IA-TSO-NEXT:    mv a5, a3
+; RV64IA-TSO-NEXT:    bgeu a1, a6, .LBB128_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV64IA-TSO-NEXT:    xor a5, a3, a1
+; RV64IA-TSO-NEXT:    and a5, a5, a4
+; RV64IA-TSO-NEXT:    xor a5, a3, a5
+; RV64IA-TSO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a5, a5, (a2)
+; RV64IA-TSO-NEXT:    bnez a5, .LBB128_1
+; RV64IA-TSO-NEXT:  # %bb.4:
+; RV64IA-TSO-NEXT:    srlw a0, a3, a0
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -11487,17 +13791,29 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i32_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a0)
-; RV32IA-NEXT:    and a3, a2, a1
-; RV32IA-NEXT:    not a3, a3
-; RV32IA-NEXT:    sc.w a3, a3, (a0)
-; RV32IA-NEXT:    bnez a3, .LBB151_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    mv a0, a2
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-WMO-NEXT:    and a3, a2, a1
+; RV32IA-WMO-NEXT:    not a3, a3
+; RV32IA-WMO-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-WMO-NEXT:    bnez a3, .LBB151_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    mv a0, a2
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-NEXT:    and a3, a2, a1
+; RV32IA-TSO-NEXT:    not a3, a3
+; RV32IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-NEXT:    bnez a3, .LBB151_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    mv a0, a2
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i32_acquire:
 ; RV64I:       # %bb.0:
@@ -11509,17 +13825,29 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i32_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.w a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB151_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i32_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.w a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB151_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i32_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB151_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i32 %b acquire
   ret i32 %1
 }
@@ -11535,17 +13863,29 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i32_release:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a2, (a0)
-; RV32IA-NEXT:    and a3, a2, a1
-; RV32IA-NEXT:    not a3, a3
-; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
-; RV32IA-NEXT:    bnez a3, .LBB152_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    mv a0, a2
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a2, (a0)
+; RV32IA-WMO-NEXT:    and a3, a2, a1
+; RV32IA-WMO-NEXT:    not a3, a3
+; RV32IA-WMO-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-NEXT:    bnez a3, .LBB152_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    mv a0, a2
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-NEXT:    and a3, a2, a1
+; RV32IA-TSO-NEXT:    not a3, a3
+; RV32IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-NEXT:    bnez a3, .LBB152_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    mv a0, a2
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i32_release:
 ; RV64I:       # %bb.0:
@@ -11557,17 +13897,29 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i32_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.w.rl a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB152_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i32_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB152_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i32_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB152_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i32 %b release
   ret i32 %1
 }
@@ -11583,17 +13935,29 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i32_acq_rel:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a2, (a0)
-; RV32IA-NEXT:    and a3, a2, a1
-; RV32IA-NEXT:    not a3, a3
-; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
-; RV32IA-NEXT:    bnez a3, .LBB153_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    mv a0, a2
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-WMO-NEXT:    and a3, a2, a1
+; RV32IA-WMO-NEXT:    not a3, a3
+; RV32IA-WMO-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-NEXT:    bnez a3, .LBB153_1
+; RV32IA-WMO-NEXT:  # %bb.2:
+; RV32IA-WMO-NEXT:    mv a0, a2
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-NEXT:    and a3, a2, a1
+; RV32IA-TSO-NEXT:    not a3, a3
+; RV32IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-NEXT:    bnez a3, .LBB153_1
+; RV32IA-TSO-NEXT:  # %bb.2:
+; RV32IA-TSO-NEXT:    mv a0, a2
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i32_acq_rel:
 ; RV64I:       # %bb.0:
@@ -11605,17 +13969,29 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i32_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.w.rl a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB153_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aq a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB153_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.w a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB153_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -14650,17 +17026,29 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i64_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB206_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.d a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB206_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i64_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB206_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.d a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB206_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i64_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB206_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.d a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB206_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i64 %b acquire
   ret i64 %1
 }
@@ -14696,17 +17084,29 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i64_release:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB207_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.d.rl a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB207_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i64_release:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB207_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB207_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i64_release:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB207_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.d a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB207_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i64 %b release
   ret i64 %1
 }
@@ -14742,17 +17142,29 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: atomicrmw_nand_i64_acq_rel:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB208_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aq a2, (a0)
-; RV64IA-NEXT:    and a3, a2, a1
-; RV64IA-NEXT:    not a3, a3
-; RV64IA-NEXT:    sc.d.rl a3, a3, (a0)
-; RV64IA-NEXT:    bnez a3, .LBB208_1
-; RV64IA-NEXT:  # %bb.2:
-; RV64IA-NEXT:    mv a0, a2
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB208_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aq a2, (a0)
+; RV64IA-WMO-NEXT:    and a3, a2, a1
+; RV64IA-WMO-NEXT:    not a3, a3
+; RV64IA-WMO-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64IA-WMO-NEXT:    bnez a3, .LBB208_1
+; RV64IA-WMO-NEXT:  # %bb.2:
+; RV64IA-WMO-NEXT:    mv a0, a2
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB208_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a2, (a0)
+; RV64IA-TSO-NEXT:    and a3, a2, a1
+; RV64IA-TSO-NEXT:    not a3, a3
+; RV64IA-TSO-NEXT:    sc.d a3, a3, (a0)
+; RV64IA-TSO-NEXT:    bnez a3, .LBB208_1
+; RV64IA-TSO-NEXT:  # %bb.2:
+; RV64IA-TSO-NEXT:    mv a0, a2
+; RV64IA-TSO-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i64 %b acq_rel
   ret i64 %1
 }


        


More information about the llvm-commits mailing list