[llvm] [RISCV] Initial ISel support for the experimental zacas extension (PR #67918)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 1 07:44:43 PDT 2023


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/67918

This patch implements the v1.0-rc3 draft extension based on AArch64's version.

It introduces two register classes (GPRPI64/GPRPI128) and some pseudo instructions for correct register allocation.
These pseudo instructions will be expanded in the `RISCVExpandAtomicPseudoInsts` pass.

Migrated from https://reviews.llvm.org/D158956.


>From d874e3e400a437697b97fe186fce06c2b45f1e30 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 1 Oct 2023 22:42:12 +0800
Subject: [PATCH] [RISCV] Initial ISel support for the experimental zacas
 extension

---
 .../RISCV/RISCVExpandAtomicPseudoInsts.cpp    |  145 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   71 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoA.td      |   22 +
 llvm/lib/Target/RISCV/RISCVRegisterInfo.td    |   20 +
 .../RISCV/atomic-cmpxchg-branch-on-result.ll  |  362 +-
 .../test/CodeGen/RISCV/atomic-cmpxchg-flag.ll |   10 +
 llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll     | 3720 +++++++++++++++--
 7 files changed, 4007 insertions(+), 343 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index bb772fc5da92244..43abdc2b06f8af4 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -59,6 +59,9 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass {
   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, bool IsMasked,
                            int Width, MachineBasicBlock::iterator &NextMBBI);
+  bool expandAMOCAS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                    bool IsPaired, int Width,
+                    MachineBasicBlock::iterator &NextMBBI);
 #ifndef NDEBUG
   unsigned getInstSizeInBytes(const MachineFunction &MF) const {
     unsigned Size = 0;
@@ -145,6 +148,14 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
     return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
   case RISCV::PseudoMaskedCmpXchg32:
     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+  case RISCV::PseudoAMOCAS_W:
+    return expandAMOCAS(MBB, MBBI, false, 32, NextMBBI);
+  case RISCV::PseudoAMOCAS_D_64:
+    return expandAMOCAS(MBB, MBBI, false, 64, NextMBBI);
+  case RISCV::PseudoAMOCAS_D_32:
+    return expandAMOCAS(MBB, MBBI, true, 64, NextMBBI);
+  case RISCV::PseudoAMOCAS_Q:
+    return expandAMOCAS(MBB, MBBI, true, 128, NextMBBI);
   }
 
   return false;
@@ -256,6 +267,74 @@ static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
   llvm_unreachable("Unexpected SC width\n");
 }
 
+static unsigned getAMOCASForRMW32(AtomicOrdering Ordering,
+                                  const RISCVSubtarget *Subtarget) {
+  if (Subtarget->hasStdExtZtso())
+    return RISCV::AMOCAS_W;
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::AMOCAS_W;
+  case AtomicOrdering::Acquire:
+    return RISCV::AMOCAS_W_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::AMOCAS_W_RL;
+  case AtomicOrdering::AcquireRelease:
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::AMOCAS_W_AQ_RL;
+  }
+}
+
+static unsigned getAMOCASForRMW64(AtomicOrdering Ordering,
+                                  const RISCVSubtarget *Subtarget) {
+  if (Subtarget->hasStdExtZtso())
+    return RISCV::AMOCAS_D;
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::AMOCAS_D;
+  case AtomicOrdering::Acquire:
+    return RISCV::AMOCAS_D_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::AMOCAS_D_RL;
+  case AtomicOrdering::AcquireRelease:
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::AMOCAS_D_AQ_RL;
+  }
+}
+
+static unsigned getAMOCASForRMW128(AtomicOrdering Ordering,
+                                   const RISCVSubtarget *Subtarget) {
+  if (Subtarget->hasStdExtZtso())
+    return RISCV::AMOCAS_Q;
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::AMOCAS_Q;
+  case AtomicOrdering::Acquire:
+    return RISCV::AMOCAS_Q_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::AMOCAS_Q_RL;
+  case AtomicOrdering::AcquireRelease:
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::AMOCAS_Q_AQ_RL;
+  }
+}
+
+static unsigned getAMOCASForRMW(AtomicOrdering Ordering, int Width,
+                                const RISCVSubtarget *Subtarget) {
+  if (Width == 32)
+    return getAMOCASForRMW32(Ordering, Subtarget);
+  if (Width == 64)
+    return getAMOCASForRMW64(Ordering, Subtarget);
+  if (Width == 128)
+    return getAMOCASForRMW128(Ordering, Subtarget);
+  llvm_unreachable("Unexpected AMOCAS width\n");
+}
+
 static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
                                    DebugLoc DL, MachineBasicBlock *ThisMBB,
                                    MachineBasicBlock *LoopMBB,
@@ -728,6 +807,72 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
   return true;
 }
 
+static Register getGPRPairEvenReg(Register PairedReg) {
+  switch (PairedReg) {
+  case RISCV::X0_PD:
+    return RISCV::X0;
+  case RISCV::X2_PD:
+    return RISCV::X2;
+  case RISCV::X4_PD:
+    return RISCV::X4;
+  case RISCV::X6_PD:
+    return RISCV::X6;
+  case RISCV::X8_PD:
+    return RISCV::X8;
+  case RISCV::X10_PD:
+    return RISCV::X10;
+  case RISCV::X12_PD:
+    return RISCV::X12;
+  case RISCV::X14_PD:
+    return RISCV::X14;
+  case RISCV::X16_PD:
+    return RISCV::X16;
+  case RISCV::X18_PD:
+    return RISCV::X18;
+  case RISCV::X20_PD:
+    return RISCV::X20;
+  case RISCV::X22_PD:
+    return RISCV::X22;
+  case RISCV::X24_PD:
+    return RISCV::X24;
+  case RISCV::X26_PD:
+    return RISCV::X26;
+  case RISCV::X28_PD:
+    return RISCV::X28;
+  case RISCV::X30_PD:
+    return RISCV::X30;
+  default:
+    llvm_unreachable("Unexpected GPR pair");
+  }
+}
+
+bool RISCVExpandAtomicPseudo::expandAMOCAS(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsPaired,
+    int Width, MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register DestReg = MI.getOperand(0).getReg();
+  if (IsPaired)
+    DestReg = getGPRPairEvenReg(DestReg);
+  Register AddrReg = MI.getOperand(1).getReg();
+  Register NewValReg = MI.getOperand(3).getReg();
+  if (IsPaired)
+    NewValReg = getGPRPairEvenReg(NewValReg);
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+  MachineInstr *NewMI =
+      BuildMI(MBB, MBBI, DL, TII->get(getAMOCASForRMW(Ordering, Width, STI)))
+          .addReg(DestReg)
+          .addReg(AddrReg)
+          .addReg(NewValReg);
+  NewMI->getOperand(0).setIsDef(true);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 } // end of anonymous namespace
 
 INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e3975df1c4425d..de640deda82e92b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -552,7 +552,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   }
 
   if (Subtarget.hasStdExtA()) {
-    setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
+    if (Subtarget.hasStdExtZacas())
+      setMaxAtomicSizeInBitsSupported(Subtarget.getXLen() * 2);
+    else
+      setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
     setMinCmpXchgSizeInBits(32);
   } else if (Subtarget.hasForcedAtomics()) {
     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
@@ -1249,6 +1252,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         XLenVT, LibCall);
   }
 
+  // Set atomic_cmp_swap operations to expand to AMOCAS.D (RV32) and AMOCAS.Q
+  // (RV64).
+  if (Subtarget.hasStdExtZacas())
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,
+                       Subtarget.is64Bit() ? MVT::i128 : MVT::i64, Custom);
+
   if (Subtarget.hasVendorXTHeadMemIdx()) {
     for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
          ++im) {
@@ -10451,6 +10460,63 @@ static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
 }
 
+// Create an even/odd pair of X registers holding integer value V.
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V, MVT VT,
+                                 MVT SubRegVT) {
+  SDLoc DL(V.getNode());
+  SDValue VLo = DAG.getAnyExtOrTrunc(V, DL, SubRegVT);
+  SDValue VHi = DAG.getAnyExtOrTrunc(
+      DAG.getNode(
+          ISD::SRL, DL, VT, V,
+          DAG.getConstant(SubRegVT == MVT::i64 ? 64 : 32, DL, SubRegVT)),
+      DL, SubRegVT);
+  SDValue RegClass = DAG.getTargetConstant(
+      VT == MVT::i128 ? RISCV::GPRPI128RegClassID : RISCV::GPRPI64RegClassID,
+      DL, MVT::i32);
+  SDValue SubReg0 = DAG.getTargetConstant(RISCV::sub_32, DL, MVT::i32);
+  SDValue SubReg1 = DAG.getTargetConstant(RISCV::sub_32_hi, DL, MVT::i32);
+  const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
+  return SDValue(
+      DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0);
+}
+
+static void ReplaceCMP_SWAP_2XLenResults(SDNode *N,
+                                         SmallVectorImpl<SDValue> &Results,
+                                         SelectionDAG &DAG,
+                                         const RISCVSubtarget &Subtarget) {
+  MVT VT = N->getSimpleValueType(0);
+  assert(N->getValueType(0) == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) &&
+         "AtomicCmpSwap on types less than 2*XLen should be legal");
+  assert(Subtarget.hasStdExtZacas());
+  MVT SubRegVT = (VT == MVT::i64 ? MVT::i32 : MVT::i64);
+
+  SDLoc DL(N);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  AtomicOrdering Ordering = MemOp->getMergedOrdering();
+  SDValue Ops[] = {
+      N->getOperand(1),                                       // Ptr
+      createGPRPairNode(DAG, N->getOperand(2), VT, SubRegVT), // Compare value
+      createGPRPairNode(DAG, N->getOperand(3), VT, SubRegVT), // Store value
+      DAG.getTargetConstant(static_cast<unsigned>(Ordering), DL,
+                            MVT::i32), // Ordering
+      N->getOperand(0),                // Chain in
+  };
+
+  unsigned Opcode =
+      (VT == MVT::i64 ? RISCV::PseudoAMOCAS_D_32 : RISCV::PseudoAMOCAS_Q);
+  MachineSDNode *CmpSwap = DAG.getMachineNode(
+      Opcode, DL, DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
+  DAG.setNodeMemRefs(CmpSwap, {MemOp});
+
+  unsigned SubReg1 = RISCV::sub_32, SubReg2 = RISCV::sub_32_hi;
+  SDValue Lo =
+      DAG.getTargetExtractSubreg(SubReg1, DL, SubRegVT, SDValue(CmpSwap, 0));
+  SDValue Hi =
+      DAG.getTargetExtractSubreg(SubReg2, DL, SubRegVT, SDValue(CmpSwap, 0));
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, VT, Lo, Hi));
+  Results.push_back(SDValue(CmpSwap, 1));
+}
+
 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue> &Results,
                                              SelectionDAG &DAG) const {
@@ -10458,6 +10524,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom type legalize this operation!");
+  case ISD::ATOMIC_CMP_SWAP:
+    ReplaceCMP_SWAP_2XLenResults(N, Results, DAG, Subtarget);
+    break;
   case ISD::STRICT_FP_TO_SINT:
   case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index c43af14bb7f7005..d175ae49f1919ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -295,6 +295,28 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
             (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
 }
 
+let Predicates = [HasStdExtZacas] in {
+class PseudoAMOCAS<RegisterClass RC = GPR>
+    : Pseudo<(outs RC:$res),
+             (ins GPR:$addr, RC:$cmpval, RC:$newval, ixlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res, $res = $cmpval";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+def PseudoAMOCAS_W: PseudoAMOCAS;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoAMOCAS_W>;
+
+let Predicates = [HasStdExtZacas, IsRV32] in {
+  def PseudoAMOCAS_D_32: PseudoAMOCAS<GPRPI64>;
+}
+let Predicates = [HasStdExtZacas, IsRV64] in {
+  def PseudoAMOCAS_D_64: PseudoAMOCAS;
+  defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoAMOCAS_D_64>;
+  def PseudoAMOCAS_Q: PseudoAMOCAS<GPRPI128>;
+}
+}
+
 def PseudoCmpXchg32 : PseudoCmpXchg;
 defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
 
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index ab0d354967b34c7..9c3c8b85782f613 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -578,6 +578,26 @@ def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
     X0_PD, X2_PD, X4_PD
 )>;
 
+let RegInfos = RegInfoByHwMode<[RV32], [RegInfo<32, 32, 32>]> in
+def GPRPI64 : RegisterClass<"RISCV", [i64], 32, (add
+    X10_PD, X12_PD, X14_PD, X16_PD,
+    X6_PD,
+    X28_PD, X30_PD,
+    X8_PD,
+    X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+    X0_PD, X2_PD, X4_PD
+)>;
+
+let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
+def GPRPI128 : RegisterClass<"RISCV", [i128], 64, (add
+    X10_PD, X12_PD, X14_PD, X16_PD,
+    X6_PD,
+    X28_PD, X30_PD,
+    X8_PD,
+    X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+    X0_PD, X2_PD, X4_PD
+)>;
+
 // The register class is added for inline assembly for vector mask types.
 def VM : VReg<VMaskVTs,
            (add (sequence "V%u", 8, 31),
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
index 651f58d324422f2..08c9aa685479dfa 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
@@ -1,30 +1,72 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,RV32IA %s
+; RUN:   | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA-ZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,RV64IA %s
+; RUN:   | FileCheck -check-prefixes=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA-ZACAS %s
 
 ; Test cmpxchg followed by a branch on the cmpxchg success value to see if the
 ; branch is folded into the cmpxchg expansion.
 
 define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB0_1: # %do_cmpxchg
-; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB0_3 Depth 2
-; CHECK-NEXT:  .LBB0_3: # %do_cmpxchg
-; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
-; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lr.w.aqrl a3, (a0)
-; CHECK-NEXT:    bne a3, a1, .LBB0_1
-; CHECK-NEXT:  # %bb.4: # %do_cmpxchg
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=2
-; CHECK-NEXT:    sc.w.rl a4, a2, (a0)
-; CHECK-NEXT:    bnez a4, .LBB0_3
-; CHECK-NEXT:  # %bb.5: # %do_cmpxchg
-; CHECK-NEXT:  # %bb.2: # %exit
-; CHECK-NEXT:    ret
+; RV32IA-LABEL: cmpxchg_and_branch1:
+; RV32IA:       # %bb.0: # %entry
+; RV32IA-NEXT:  .LBB0_1: # %do_cmpxchg
+; RV32IA-NEXT:    # =>This Loop Header: Depth=1
+; RV32IA-NEXT:    # Child Loop BB0_3 Depth 2
+; RV32IA-NEXT:  .LBB0_3: # %do_cmpxchg
+; RV32IA-NEXT:    # Parent Loop BB0_1 Depth=1
+; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-NEXT:    bne a3, a1, .LBB0_1
+; RV32IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
+; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT:    bnez a4, .LBB0_3
+; RV32IA-NEXT:  # %bb.5: # %do_cmpxchg
+; RV32IA-NEXT:  # %bb.2: # %exit
+; RV32IA-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV32IA-ZACAS:       # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT:  .LBB0_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    mv a3, a1
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT:    bne a3, a1, .LBB0_1
+; RV32IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch1:
+; RV64IA:       # %bb.0: # %entry
+; RV64IA-NEXT:  .LBB0_1: # %do_cmpxchg
+; RV64IA-NEXT:    # =>This Loop Header: Depth=1
+; RV64IA-NEXT:    # Child Loop BB0_3 Depth 2
+; RV64IA-NEXT:  .LBB0_3: # %do_cmpxchg
+; RV64IA-NEXT:    # Parent Loop BB0_1 Depth=1
+; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-NEXT:    bne a3, a1, .LBB0_1
+; RV64IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
+; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT:    bnez a4, .LBB0_3
+; RV64IA-NEXT:  # %bb.5: # %do_cmpxchg
+; RV64IA-NEXT:  # %bb.2: # %exit
+; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:  .LBB0_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    mv a3, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT:    bne a3, a1, .LBB0_1
+; RV64IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT:    ret
 entry:
   br label %do_cmpxchg
 do_cmpxchg:
@@ -36,25 +78,65 @@ exit:
 }
 
 define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB1_1: # %do_cmpxchg
-; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB1_3 Depth 2
-; CHECK-NEXT:  .LBB1_3: # %do_cmpxchg
-; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
-; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lr.w.aqrl a3, (a0)
-; CHECK-NEXT:    bne a3, a1, .LBB1_5
-; CHECK-NEXT:  # %bb.4: # %do_cmpxchg
-; CHECK-NEXT:    # in Loop: Header=BB1_3 Depth=2
-; CHECK-NEXT:    sc.w.rl a4, a2, (a0)
-; CHECK-NEXT:    bnez a4, .LBB1_3
-; CHECK-NEXT:  .LBB1_5: # %do_cmpxchg
-; CHECK-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    beq a3, a1, .LBB1_1
-; CHECK-NEXT:  # %bb.2: # %exit
-; CHECK-NEXT:    ret
+; RV32IA-LABEL: cmpxchg_and_branch2:
+; RV32IA:       # %bb.0: # %entry
+; RV32IA-NEXT:  .LBB1_1: # %do_cmpxchg
+; RV32IA-NEXT:    # =>This Loop Header: Depth=1
+; RV32IA-NEXT:    # Child Loop BB1_3 Depth 2
+; RV32IA-NEXT:  .LBB1_3: # %do_cmpxchg
+; RV32IA-NEXT:    # Parent Loop BB1_1 Depth=1
+; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-NEXT:    bne a3, a1, .LBB1_5
+; RV32IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
+; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT:    bnez a4, .LBB1_3
+; RV32IA-NEXT:  .LBB1_5: # %do_cmpxchg
+; RV32IA-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; RV32IA-NEXT:    beq a3, a1, .LBB1_1
+; RV32IA-NEXT:  # %bb.2: # %exit
+; RV32IA-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV32IA-ZACAS:       # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT:  .LBB1_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    mv a3, a1
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT:    beq a3, a1, .LBB1_1
+; RV32IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch2:
+; RV64IA:       # %bb.0: # %entry
+; RV64IA-NEXT:  .LBB1_1: # %do_cmpxchg
+; RV64IA-NEXT:    # =>This Loop Header: Depth=1
+; RV64IA-NEXT:    # Child Loop BB1_3 Depth 2
+; RV64IA-NEXT:  .LBB1_3: # %do_cmpxchg
+; RV64IA-NEXT:    # Parent Loop BB1_1 Depth=1
+; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-NEXT:    bne a3, a1, .LBB1_5
+; RV64IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
+; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT:    bnez a4, .LBB1_3
+; RV64IA-NEXT:  .LBB1_5: # %do_cmpxchg
+; RV64IA-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; RV64IA-NEXT:    beq a3, a1, .LBB1_1
+; RV64IA-NEXT:  # %bb.2: # %exit
+; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:  .LBB1_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    mv a3, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT:    beq a3, a1, .LBB1_1
+; RV64IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT:    ret
 entry:
   br label %do_cmpxchg
 do_cmpxchg:
@@ -96,6 +178,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
 ; RV32IA-NEXT:  # %bb.2: # %exit
 ; RV32IA-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch1:
+; RV32IA-ZACAS:       # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a4, a0, 3
+; RV32IA-ZACAS-NEXT:    li a0, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a0, a4
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a4
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a4
+; RV32IA-ZACAS-NEXT:  .LBB2_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # =>This Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    # Child Loop BB2_3 Depth 2
+; RV32IA-ZACAS-NEXT:  .LBB2_3: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # Parent Loop BB2_1 Depth=1
+; RV32IA-ZACAS-NEXT:    # => This Inner Loop Header: Depth=2
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a4, a0
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB2_1
+; RV32IA-ZACAS-NEXT:  # %bb.4: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # in Loop: Header=BB2_3 Depth=2
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a2
+; RV32IA-ZACAS-NEXT:    and a5, a5, a0
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB2_3
+; RV32IA-ZACAS-NEXT:  # %bb.5: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-LABEL: cmpxchg_masked_and_branch1:
 ; RV64IA:       # %bb.0: # %entry
 ; RV64IA-NEXT:    andi a3, a0, -4
@@ -125,6 +237,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
 ; RV64IA-NEXT:  # %bb.5: # %do_cmpxchg
 ; RV64IA-NEXT:  # %bb.2: # %exit
 ; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch1:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a4, a0, 3
+; RV64IA-ZACAS-NEXT:    li a0, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a0, a4
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a4
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a2, a2, a4
+; RV64IA-ZACAS-NEXT:  .LBB2_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # =>This Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    # Child Loop BB2_3 Depth 2
+; RV64IA-ZACAS-NEXT:  .LBB2_3: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # Parent Loop BB2_1 Depth=1
+; RV64IA-ZACAS-NEXT:    # => This Inner Loop Header: Depth=2
+; RV64IA-ZACAS-NEXT:    lr.w.aqrl a4, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a4, a0
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB2_1
+; RV64IA-ZACAS-NEXT:  # %bb.4: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # in Loop: Header=BB2_3 Depth=2
+; RV64IA-ZACAS-NEXT:    xor a5, a4, a2
+; RV64IA-ZACAS-NEXT:    and a5, a5, a0
+; RV64IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB2_3
+; RV64IA-ZACAS-NEXT:  # %bb.5: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT:    ret
 entry:
   br label %do_cmpxchg
 do_cmpxchg:
@@ -169,6 +311,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
 ; RV32IA-NEXT:  # %bb.2: # %exit
 ; RV32IA-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch2:
+; RV32IA-ZACAS:       # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a4, a0, 3
+; RV32IA-ZACAS-NEXT:    li a0, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a0, a4
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a4
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a4
+; RV32IA-ZACAS-NEXT:  .LBB3_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # =>This Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    # Child Loop BB3_3 Depth 2
+; RV32IA-ZACAS-NEXT:  .LBB3_3: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # Parent Loop BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT:    # => This Inner Loop Header: Depth=2
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a4, a0
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB3_5
+; RV32IA-ZACAS-NEXT:  # %bb.4: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # in Loop: Header=BB3_3 Depth=2
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a2
+; RV32IA-ZACAS-NEXT:    and a5, a5, a0
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB3_3
+; RV32IA-ZACAS-NEXT:  .LBB3_5: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT:    and a4, a4, a0
+; RV32IA-ZACAS-NEXT:    beq a1, a4, .LBB3_1
+; RV32IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-LABEL: cmpxchg_masked_and_branch2:
 ; RV64IA:       # %bb.0: # %entry
 ; RV64IA-NEXT:    andi a3, a0, -4
@@ -201,6 +376,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
 ; RV64IA-NEXT:    beq a1, a4, .LBB3_1
 ; RV64IA-NEXT:  # %bb.2: # %exit
 ; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch2:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a4, a0, 3
+; RV64IA-ZACAS-NEXT:    li a0, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a0, a4
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a4
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a2, a2, a4
+; RV64IA-ZACAS-NEXT:  .LBB3_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # =>This Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    # Child Loop BB3_3 Depth 2
+; RV64IA-ZACAS-NEXT:  .LBB3_3: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # Parent Loop BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT:    # => This Inner Loop Header: Depth=2
+; RV64IA-ZACAS-NEXT:    lr.w.aqrl a4, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a4, a0
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB3_5
+; RV64IA-ZACAS-NEXT:  # %bb.4: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # in Loop: Header=BB3_3 Depth=2
+; RV64IA-ZACAS-NEXT:    xor a5, a4, a2
+; RV64IA-ZACAS-NEXT:    and a5, a5, a0
+; RV64IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB3_3
+; RV64IA-ZACAS-NEXT:  .LBB3_5: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT:    and a4, a4, a0
+; RV64IA-ZACAS-NEXT:    beq a1, a4, .LBB3_1
+; RV64IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT:    ret
 entry:
   br label %do_cmpxchg
 do_cmpxchg:
@@ -212,25 +420,65 @@ exit:
 }
 
 define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signext %val, i1 zeroext %bool) nounwind {
-; CHECK-LABEL: cmpxchg_and_irrelevant_branch:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB4_1: # %do_cmpxchg
-; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB4_3 Depth 2
-; CHECK-NEXT:  .LBB4_3: # %do_cmpxchg
-; CHECK-NEXT:    # Parent Loop BB4_1 Depth=1
-; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lr.w.aqrl a4, (a0)
-; CHECK-NEXT:    bne a4, a1, .LBB4_5
-; CHECK-NEXT:  # %bb.4: # %do_cmpxchg
-; CHECK-NEXT:    # in Loop: Header=BB4_3 Depth=2
-; CHECK-NEXT:    sc.w.rl a5, a2, (a0)
-; CHECK-NEXT:    bnez a5, .LBB4_3
-; CHECK-NEXT:  .LBB4_5: # %do_cmpxchg
-; CHECK-NEXT:    # in Loop: Header=BB4_1 Depth=1
-; CHECK-NEXT:    beqz a3, .LBB4_1
-; CHECK-NEXT:  # %bb.2: # %exit
-; CHECK-NEXT:    ret
+; RV32IA-LABEL: cmpxchg_and_irrelevant_branch:
+; RV32IA:       # %bb.0: # %entry
+; RV32IA-NEXT:  .LBB4_1: # %do_cmpxchg
+; RV32IA-NEXT:    # =>This Loop Header: Depth=1
+; RV32IA-NEXT:    # Child Loop BB4_3 Depth 2
+; RV32IA-NEXT:  .LBB4_3: # %do_cmpxchg
+; RV32IA-NEXT:    # Parent Loop BB4_1 Depth=1
+; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    bne a4, a1, .LBB4_5
+; RV32IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT:    # in Loop: Header=BB4_3 Depth=2
+; RV32IA-NEXT:    sc.w.rl a5, a2, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB4_3
+; RV32IA-NEXT:  .LBB4_5: # %do_cmpxchg
+; RV32IA-NEXT:    # in Loop: Header=BB4_1 Depth=1
+; RV32IA-NEXT:    beqz a3, .LBB4_1
+; RV32IA-NEXT:  # %bb.2: # %exit
+; RV32IA-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch:
+; RV32IA-ZACAS:       # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT:  .LBB4_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    mv a4, a1
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a4, a2, (a0)
+; RV32IA-ZACAS-NEXT:    beqz a3, .LBB4_1
+; RV32IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV64IA-LABEL: cmpxchg_and_irrelevant_branch:
+; RV64IA:       # %bb.0: # %entry
+; RV64IA-NEXT:  .LBB4_1: # %do_cmpxchg
+; RV64IA-NEXT:    # =>This Loop Header: Depth=1
+; RV64IA-NEXT:    # Child Loop BB4_3 Depth 2
+; RV64IA-NEXT:  .LBB4_3: # %do_cmpxchg
+; RV64IA-NEXT:    # Parent Loop BB4_1 Depth=1
+; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV64IA-NEXT:    bne a4, a1, .LBB4_5
+; RV64IA-NEXT:  # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT:    # in Loop: Header=BB4_3 Depth=2
+; RV64IA-NEXT:    sc.w.rl a5, a2, (a0)
+; RV64IA-NEXT:    bnez a5, .LBB4_3
+; RV64IA-NEXT:  .LBB4_5: # %do_cmpxchg
+; RV64IA-NEXT:    # in Loop: Header=BB4_1 Depth=1
+; RV64IA-NEXT:    beqz a3, .LBB4_1
+; RV64IA-NEXT:  # %bb.2: # %exit
+; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:  .LBB4_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    mv a4, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a4, a2, (a0)
+; RV64IA-ZACAS-NEXT:    beqz a3, .LBB4_1
+; RV64IA-ZACAS-NEXT:  # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT:    ret
 entry:
   br label %do_cmpxchg
 do_cmpxchg:
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
index f25571b5cf25310..4c1ac38be0630e8 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64IA-ZACAS %s
 
 ; This test ensures that the output of the 'lr.w' instruction is sign-extended.
 ; Previously, the default zero-extension was being used and 'cmp' parameter
@@ -21,6 +23,14 @@ define i1 @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 signext %cmp,
 ; RV64IA-NEXT:    xor a1, a3, a1
 ; RV64IA-NEXT:    seqz a0, a1
 ; RV64IA-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-ZACAS:       # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT:    mv a3, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT:    xor a1, a3, a1
+; RV64IA-ZACAS-NEXT:    seqz a0, a1
+; RV64IA-ZACAS-NEXT:    ret
         i32 signext %val) nounwind {
 entry:
   %0 = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index f900b5161f75128..faeb3512c317e1b 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -3,12 +3,16 @@
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
 
@@ -125,6 +129,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB1_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB1_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-ZACAS-NEXT:  .LBB1_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -184,6 +211,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB1_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB1_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV64IA-ZACAS-NEXT:  .LBB1_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -206,6 +256,50 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB1_1
 ; RV64IA-TSO-NEXT:  .LBB1_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB1_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB1_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB1_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB1_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
   ret void
 }
@@ -247,6 +341,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB2_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB2_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-ZACAS-NEXT:  .LBB2_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -306,6 +423,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB2_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB2_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV64IA-ZACAS-NEXT:  .LBB2_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -328,6 +468,50 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB2_1
 ; RV64IA-TSO-NEXT:  .LBB2_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB2_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB2_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB2_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB2_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
   ret void
 }
@@ -369,6 +553,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB3_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB3_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-ZACAS-NEXT:  .LBB3_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -428,6 +635,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB3_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB3_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV64IA-ZACAS-NEXT:  .LBB3_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -450,6 +680,50 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB3_1
 ; RV64IA-TSO-NEXT:  .LBB3_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB3_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB3_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB3_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB3_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
   ret void
 }
@@ -491,6 +765,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB4_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB4_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB4_1
+; RV32IA-ZACAS-NEXT:  .LBB4_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -550,6 +847,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB4_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB4_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB4_1
+; RV64IA-ZACAS-NEXT:  .LBB4_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -572,6 +892,50 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB4_1
 ; RV64IA-TSO-NEXT:  .LBB4_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB4_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB4_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB4_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB4_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB4_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB4_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire
   ret void
 }
@@ -613,6 +977,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB5_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB5_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB5_1
+; RV32IA-ZACAS-NEXT:  .LBB5_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -672,6 +1059,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB5_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB5_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB5_1
+; RV64IA-ZACAS-NEXT:  .LBB5_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -694,6 +1104,50 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB5_1
 ; RV64IA-TSO-NEXT:  .LBB5_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB5_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB5_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB5_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB5_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB5_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB5_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
   ret void
 }
@@ -735,6 +1189,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA-WMO-NEXT:  .LBB6_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a4, 255
+; RV32IA-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-ZACAS-NEXT:    bne a5, a1, .LBB6_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB6_1
+; RV32IA-ZACAS-NEXT:  .LBB6_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -794,6 +1271,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-WMO-NEXT:  .LBB6_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    li a4, 255
+; RV64IA-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-ZACAS-NEXT:    bne a5, a1, .LBB6_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a5, .LBB6_1
+; RV64IA-ZACAS-NEXT:  .LBB6_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -816,6 +1316,50 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV64IA-TSO-NEXT:    bnez a5, .LBB6_1
 ; RV64IA-TSO-NEXT:  .LBB6_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB6_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB6_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB6_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    li a4, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    bne a5, a1, .LBB6_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT:    xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a5, .LBB6_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB6_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire
   ret void
 }
@@ -1164,6 +1708,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB11_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB11_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB11_1
+; RV32IA-ZACAS-NEXT:  .LBB11_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1225,6 +1793,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB11_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB11_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB11_1
+; RV64IA-ZACAS-NEXT:  .LBB11_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1248,6 +1840,52 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB11_1
 ; RV64IA-TSO-NEXT:  .LBB11_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB11_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB11_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB11_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB11_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB11_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB11_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
   ret void
 }
@@ -1290,6 +1928,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB12_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB12_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB12_1
+; RV32IA-ZACAS-NEXT:  .LBB12_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1351,6 +2013,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB12_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB12_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB12_1
+; RV64IA-ZACAS-NEXT:  .LBB12_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1374,6 +2060,52 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB12_1
 ; RV64IA-TSO-NEXT:  .LBB12_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB12_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB12_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB12_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB12_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB12_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB12_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
   ret void
 }
@@ -1416,6 +2148,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB13_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB13_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB13_1
+; RV32IA-ZACAS-NEXT:  .LBB13_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1477,6 +2233,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB13_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB13_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB13_1
+; RV64IA-ZACAS-NEXT:  .LBB13_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1500,6 +2280,52 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB13_1
 ; RV64IA-TSO-NEXT:  .LBB13_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB13_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB13_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB13_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB13_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB13_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB13_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
   ret void
 }
@@ -1542,6 +2368,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB14_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB14_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB14_1
+; RV32IA-ZACAS-NEXT:  .LBB14_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1603,6 +2453,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB14_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB14_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB14_1
+; RV64IA-ZACAS-NEXT:  .LBB14_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1626,6 +2500,52 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB14_1
 ; RV64IA-TSO-NEXT:  .LBB14_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB14_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB14_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB14_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB14_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB14_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB14_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire
   ret void
 }
@@ -1668,6 +2588,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB15_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB15_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB15_1
+; RV32IA-ZACAS-NEXT:  .LBB15_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1729,6 +2673,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB15_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB15_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB15_1
+; RV64IA-ZACAS-NEXT:  .LBB15_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1752,6 +2720,52 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB15_1
 ; RV64IA-TSO-NEXT:  .LBB15_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB15_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB15_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB15_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB15_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB15_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB15_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
   ret void
 }
@@ -1794,6 +2808,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB16_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a4, 16
+; RV32IA-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-ZACAS-NEXT:    bne a4, a1, .LBB16_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT:    bnez a4, .LBB16_1
+; RV32IA-ZACAS-NEXT:  .LBB16_3:
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    andi a3, a0, -4
@@ -1855,6 +2893,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB16_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-ZACAS-NEXT:    lui a4, 16
+; RV64IA-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-ZACAS-NEXT:    bne a4, a1, .LBB16_3
+; RV64IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT:    bnez a4, .LBB16_1
+; RV64IA-ZACAS-NEXT:  .LBB16_3:
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
@@ -1878,6 +2940,52 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB16_1
 ; RV64IA-TSO-NEXT:  .LBB16_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB16_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a4, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT:  .LBB16_3:
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT:    lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT:    sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT:    bne a4, a1, .LBB16_3
+; RV64IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT:    and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT:    xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT:    sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT:    bnez a4, .LBB16_1
+; RV64IA-WMO-ZACAS-NEXT:  .LBB16_3:
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire
   ret void
 }
@@ -2130,16 +3238,32 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_monotonic_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB20_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
-; RV32IA-NEXT:    sc.w a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB20_1
-; RV32IA-NEXT:  .LBB20_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB20_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB20_1
+; RV32IA-WMO-NEXT:  .LBB20_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB20_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB20_1
+; RV32IA-TSO-NEXT:  .LBB20_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic:
 ; RV64I:       # %bb.0:
@@ -2154,17 +3278,44 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_monotonic_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB20_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
-; RV64IA-NEXT:    sc.w a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB20_1
-; RV64IA-NEXT:  .LBB20_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB20_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB20_1
+; RV64IA-WMO-NEXT:  .LBB20_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB20_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB20_1
+; RV64IA-TSO-NEXT:  .LBB20_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
   ret void
 }
@@ -2194,6 +3345,11 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB21_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
@@ -2230,6 +3386,12 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB21_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2241,6 +3403,15 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB21_1
 ; RV64IA-TSO-NEXT:  .LBB21_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
   ret void
 }
@@ -2270,6 +3441,11 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB22_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
@@ -2306,6 +3482,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB22_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2317,6 +3499,15 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB22_1
 ; RV64IA-TSO-NEXT:  .LBB22_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
   ret void
 }
@@ -2346,6 +3537,11 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB23_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.rl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
@@ -2382,6 +3578,12 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB23_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.rl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2393,6 +3595,15 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB23_1
 ; RV64IA-TSO-NEXT:  .LBB23_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.rl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
   ret void
 }
@@ -2422,6 +3633,11 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB24_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
@@ -2458,6 +3674,12 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB24_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2469,6 +3691,15 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB24_1
 ; RV64IA-TSO-NEXT:  .LBB24_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire
   ret void
 }
@@ -2498,6 +3729,11 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32IA-WMO-NEXT:  .LBB25_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
@@ -2534,6 +3770,12 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB25_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2545,6 +3787,15 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB25_1
 ; RV64IA-TSO-NEXT:  .LBB25_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
   ret void
 }
@@ -2574,6 +3825,11 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32IA-WMO-NEXT:  .LBB26_3:
 ; RV32IA-WMO-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
@@ -2610,6 +3866,12 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB26_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sext.w a1, a1
@@ -2621,6 +3883,15 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB26_1
 ; RV64IA-TSO-NEXT:  .LBB26_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire
   ret void
 }
@@ -2639,16 +3910,32 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB27_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB27_1
-; RV32IA-NEXT:  .LBB27_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB27_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB27_1
+; RV32IA-WMO-NEXT:  .LBB27_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB27_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB27_1
+; RV32IA-TSO-NEXT:  .LBB27_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic:
 ; RV64I:       # %bb.0:
@@ -2663,17 +3950,44 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB27_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB27_1
-; RV64IA-NEXT:  .LBB27_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB27_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB27_1
+; RV64IA-WMO-NEXT:  .LBB27_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB27_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB27_1
+; RV64IA-TSO-NEXT:  .LBB27_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
   ret void
 }
@@ -2692,16 +4006,32 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB28_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB28_1
-; RV32IA-NEXT:  .LBB28_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB28_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB28_1
+; RV32IA-WMO-NEXT:  .LBB28_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB28_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB28_1
+; RV32IA-TSO-NEXT:  .LBB28_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire:
 ; RV64I:       # %bb.0:
@@ -2716,17 +4046,44 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB28_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB28_1
-; RV64IA-NEXT:  .LBB28_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB28_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB28_1
+; RV64IA-WMO-NEXT:  .LBB28_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB28_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB28_1
+; RV64IA-TSO-NEXT:  .LBB28_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire
   ret void
 }
@@ -2745,16 +4102,32 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV32IA-NEXT:    bne a3, a1, .LBB29_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT:    bnez a4, .LBB29_1
-; RV32IA-NEXT:  .LBB29_3:
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT:    bne a3, a1, .LBB29_3
+; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT:    bnez a4, .LBB29_1
+; RV32IA-WMO-NEXT:  .LBB29_3:
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT:    bne a3, a1, .LBB29_3
+; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT:    bnez a4, .LBB29_1
+; RV32IA-TSO-NEXT:  .LBB29_3:
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
 ; RV64I:       # %bb.0:
@@ -2769,17 +4142,44 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    sext.w a1, a1
-; RV64IA-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB29_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB29_1
-; RV64IA-NEXT:  .LBB29_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    sext.w a1, a1
+; RV64IA-WMO-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB29_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB29_1
+; RV64IA-WMO-NEXT:  .LBB29_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    sext.w a1, a1
+; RV64IA-TSO-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB29_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB29_1
+; RV64IA-TSO-NEXT:  .LBB29_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT:    amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
   ret void
 }
@@ -2801,21 +4201,46 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a4
-; RV32IA-NEXT:    li a4, 0
-; RV32IA-NEXT:    li a5, 0
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a4
+; RV32IA-WMO-NEXT:    li a4, 0
+; RV32IA-WMO-NEXT:    li a5, 0
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a4
+; RV32IA-TSO-NEXT:    li a4, 0
+; RV32IA-TSO-NEXT:    li a5, 0
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic:
 ; RV64I:       # %bb.0:
@@ -2830,16 +4255,44 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_monotonic_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB30_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB30_1 Depth=1
-; RV64IA-NEXT:    sc.d a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB30_1
-; RV64IA-NEXT:  .LBB30_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB30_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB30_1
+; RV64IA-WMO-NEXT:  .LBB30_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB30_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB30_1
+; RV64IA-TSO-NEXT:  .LBB30_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
   ret void
 }
@@ -2862,22 +4315,48 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 2
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a5
-; RV32IA-NEXT:    li a5, 0
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 2
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a5
+; RV32IA-WMO-NEXT:    li a5, 0
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aq a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 2
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a5
+; RV32IA-TSO-NEXT:    li a5, 0
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_acquire_monotonic:
 ; RV64I:       # %bb.0:
@@ -2903,6 +4382,11 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB31_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
@@ -2913,6 +4397,18 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB31_1
 ; RV64IA-TSO-NEXT:  .LBB31_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aq a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
   ret void
 }
@@ -2935,22 +4431,48 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_acquire_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a6, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 2
-; RV32IA-NEXT:    li a5, 2
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a6
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a6, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 2
+; RV32IA-WMO-NEXT:    li a5, 2
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a6
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aq a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a6, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 2
+; RV32IA-TSO-NEXT:    li a5, 2
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a6
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_acquire_acquire:
 ; RV64I:       # %bb.0:
@@ -2976,6 +4498,11 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB32_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
@@ -2986,6 +4513,18 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB32_1
 ; RV64IA-TSO-NEXT:  .LBB32_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aq a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
   ret void
 }
@@ -3008,22 +4547,48 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_release_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 3
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a5
-; RV32IA-NEXT:    li a5, 0
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 3
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a5
+; RV32IA-WMO-NEXT:    li a5, 0
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.rl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 3
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a5
+; RV32IA-TSO-NEXT:    li a5, 0
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_release_monotonic:
 ; RV64I:       # %bb.0:
@@ -3049,6 +4614,11 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB33_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.rl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
@@ -3059,6 +4629,18 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB33_1
 ; RV64IA-TSO-NEXT:  .LBB33_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.rl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
   ret void
 }
@@ -3081,22 +4663,48 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_release_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a6, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 3
-; RV32IA-NEXT:    li a5, 2
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a6
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a6, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 3
+; RV32IA-WMO-NEXT:    li a5, 2
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a6
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a6, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 3
+; RV32IA-TSO-NEXT:    li a5, 2
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a6
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_release_acquire:
 ; RV64I:       # %bb.0:
@@ -3122,6 +4730,11 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB34_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
@@ -3132,6 +4745,18 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB34_1
 ; RV64IA-TSO-NEXT:  .LBB34_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire
   ret void
 }
@@ -3154,22 +4779,48 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 4
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a5
-; RV32IA-NEXT:    li a5, 0
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 4
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a5
+; RV32IA-WMO-NEXT:    li a5, 0
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 4
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a5
+; RV32IA-TSO-NEXT:    li a5, 0
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic:
 ; RV64I:       # %bb.0:
@@ -3195,6 +4846,11 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-WMO-NEXT:  .LBB35_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
@@ -3205,6 +4861,18 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB35_1
 ; RV64IA-TSO-NEXT:  .LBB35_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
   ret void
 }
@@ -3227,22 +4895,48 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a6, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 4
-; RV32IA-NEXT:    li a5, 2
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a6
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a6, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 4
+; RV32IA-WMO-NEXT:    li a5, 2
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a6
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a6, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 4
+; RV32IA-TSO-NEXT:    li a5, 2
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a6
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire:
 ; RV64I:       # %bb.0:
@@ -3268,6 +4962,11 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-WMO-NEXT:  .LBB36_3:
 ; RV64IA-WMO-NEXT:    ret
 ;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
@@ -3278,6 +4977,18 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64IA-TSO-NEXT:    bnez a4, .LBB36_1
 ; RV64IA-TSO-NEXT:  .LBB36_3:
 ; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire
   ret void
 }
@@ -3300,22 +5011,48 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 5
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a5
-; RV32IA-NEXT:    li a5, 0
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a5, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 5
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a5
+; RV32IA-WMO-NEXT:    li a5, 0
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a5, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 5
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a5
+; RV32IA-TSO-NEXT:    li a5, 0
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic:
 ; RV64I:       # %bb.0:
@@ -3330,16 +5067,44 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB37_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB37_1
-; RV64IA-NEXT:  .LBB37_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB37_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB37_1
+; RV64IA-WMO-NEXT:  .LBB37_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB37_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB37_1
+; RV64IA-TSO-NEXT:  .LBB37_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
   ret void
 }
@@ -3362,22 +5127,48 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a6, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 5
-; RV32IA-NEXT:    li a5, 2
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a6
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a6, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 5
+; RV32IA-WMO-NEXT:    li a5, 2
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a6
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a6, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 5
+; RV32IA-TSO-NEXT:    li a5, 2
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a6
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire:
 ; RV64I:       # %bb.0:
@@ -3392,16 +5183,44 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_acquire:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB38_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB38_1
-; RV64IA-NEXT:  .LBB38_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB38_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB38_1
+; RV64IA-WMO-NEXT:  .LBB38_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB38_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB38_1
+; RV64IA-TSO-NEXT:  .LBB38_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire
   ret void
 }
@@ -3424,25 +5243,51 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    addi sp, sp, -16
-; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT:    mv a6, a4
-; RV32IA-NEXT:    sw a2, 4(sp)
-; RV32IA-NEXT:    sw a1, 0(sp)
-; RV32IA-NEXT:    mv a1, sp
-; RV32IA-NEXT:    li a4, 5
-; RV32IA-NEXT:    li a5, 5
-; RV32IA-NEXT:    mv a2, a3
-; RV32IA-NEXT:    mv a3, a6
-; RV32IA-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 16
-; RV32IA-NEXT:    ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    addi sp, sp, -16
+; RV32IA-WMO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT:    mv a6, a4
+; RV32IA-WMO-NEXT:    sw a2, 4(sp)
+; RV32IA-WMO-NEXT:    sw a1, 0(sp)
+; RV32IA-WMO-NEXT:    mv a1, sp
+; RV32IA-WMO-NEXT:    li a4, 5
+; RV32IA-WMO-NEXT:    li a5, 5
+; RV32IA-WMO-NEXT:    mv a2, a3
+; RV32IA-WMO-NEXT:    mv a3, a6
+; RV32IA-WMO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT:    addi sp, sp, 16
+; RV32IA-WMO-NEXT:    ret
 ;
-; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV64I:       # %bb.0:
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    mv a7, a2
+; RV32IA-ZACAS-NEXT:    mv a4, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a1
+; RV32IA-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    addi sp, sp, -16
+; RV32IA-TSO-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT:    mv a6, a4
+; RV32IA-TSO-NEXT:    sw a2, 4(sp)
+; RV32IA-TSO-NEXT:    sw a1, 0(sp)
+; RV32IA-TSO-NEXT:    mv a1, sp
+; RV32IA-TSO-NEXT:    li a4, 5
+; RV32IA-TSO-NEXT:    li a5, 5
+; RV32IA-TSO-NEXT:    mv a2, a3
+; RV32IA-TSO-NEXT:    mv a3, a6
+; RV32IA-TSO-NEXT:    call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT:    addi sp, sp, 16
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd a1, 0(sp)
@@ -3454,16 +5299,1321 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV64IA:       # %bb.0:
-; RV64IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.d.aqrl a3, (a0)
-; RV64IA-NEXT:    bne a3, a1, .LBB39_3
-; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
-; RV64IA-NEXT:    sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT:    bnez a4, .LBB39_1
-; RV64IA-NEXT:  .LBB39_3:
-; RV64IA-NEXT:    ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT:    bne a3, a1, .LBB39_3
+; RV64IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-WMO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT:    bnez a4, .LBB39_1
+; RV64IA-WMO-NEXT:  .LBB39_3:
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT:    lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT:    bne a3, a1, .LBB39_3
+; RV64IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-TSO-NEXT:    sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT:    bnez a4, .LBB39_1
+; RV64IA-TSO-NEXT:  .LBB39_3:
+; RV64IA-TSO-NEXT:    ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT:    mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT:    ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS:       # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT:    amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
   ret void
 }
+
+define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a5, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a6, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a5, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    mv a1, a4
+; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a4, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a5, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a6, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a6, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a5, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    mv a1, a4
+; RV32IA-NEXT:    li a4, 0
+; RV32IA-NEXT:    li a5, 0
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a4
+; RV64I-NEXT:    li a4, 0
+; RV64I-NEXT:    li a5, 0
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a4
+; RV64IA-WMO-NEXT:    li a4, 0
+; RV64IA-WMO-NEXT:    li a5, 0
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a4
+; RV64IA-TSO-NEXT:    li a4, 0
+; RV64IA-TSO-NEXT:    li a5, 0
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
+  ret void
+}
+
+define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a6, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 2
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a5, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a6, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a6, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 2
+; RV32IA-NEXT:    mv a1, a5
+; RV32IA-NEXT:    li a5, 0
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a5, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 2
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a5
+; RV64I-NEXT:    li a5, 0
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 2
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a5
+; RV64IA-WMO-NEXT:    li a5, 0
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aq a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 2
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a5
+; RV64IA-TSO-NEXT:    li a5, 0
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic
+  ret void
+}
+
+define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acquire_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 2
+; RV32I-NEXT:    li a5, 2
+; RV32I-NEXT:    mv a1, a6
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acquire_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a6, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a5, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a5, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 2
+; RV32IA-NEXT:    li a5, 2
+; RV32IA-NEXT:    mv a1, a6
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a6, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 2
+; RV64I-NEXT:    li a5, 2
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a6
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a6, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 2
+; RV64IA-WMO-NEXT:    li a5, 2
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a6
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aq a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a6, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 2
+; RV64IA-TSO-NEXT:    li a5, 2
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a6
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire
+  ret void
+}
+
+define void @cmpxchg_i128_release_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_release_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a6, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 3
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_release_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a5, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a6, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a6, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 3
+; RV32IA-NEXT:    mv a1, a5
+; RV32IA-NEXT:    li a5, 0
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_release_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a5, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 3
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a5
+; RV64I-NEXT:    li a5, 0
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 3
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a5
+; RV64IA-WMO-NEXT:    li a5, 0
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.rl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 3
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a5
+; RV64IA-TSO-NEXT:    li a5, 0
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release monotonic
+  ret void
+}
+
+define void @cmpxchg_i128_release_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_release_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 3
+; RV32I-NEXT:    li a5, 2
+; RV32I-NEXT:    mv a1, a6
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_release_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a6, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a5, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a5, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 3
+; RV32IA-NEXT:    li a5, 2
+; RV32IA-NEXT:    mv a1, a6
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_release_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a6, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 3
+; RV64I-NEXT:    li a5, 2
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a6
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a6, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 3
+; RV64IA-WMO-NEXT:    li a5, 2
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a6
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a6, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 3
+; RV64IA-TSO-NEXT:    li a5, 2
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a6
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release acquire
+  ret void
+}
+
+define void @cmpxchg_i128_acq_rel_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a6, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a5, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a6, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a6, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 4
+; RV32IA-NEXT:    mv a1, a5
+; RV32IA-NEXT:    li a5, 0
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a5, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 4
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a5
+; RV64I-NEXT:    li a5, 0
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 4
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a5
+; RV64IA-WMO-NEXT:    li a5, 0
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 4
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a5
+; RV64IA-TSO-NEXT:    li a5, 0
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel monotonic
+  ret void
+}
+
+define void @cmpxchg_i128_acq_rel_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    li a5, 2
+; RV32I-NEXT:    mv a1, a6
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a6, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a5, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a5, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 4
+; RV32IA-NEXT:    li a5, 2
+; RV32IA-NEXT:    mv a1, a6
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a6, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 4
+; RV64I-NEXT:    li a5, 2
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a6
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a6, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 4
+; RV64IA-WMO-NEXT:    li a5, 2
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a6
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a6, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 4
+; RV64IA-TSO-NEXT:    li a5, 2
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a6
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel acquire
+  ret void
+}
+
+define void @cmpxchg_i128_seq_cst_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a6, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a5, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a6, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a6, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 5
+; RV32IA-NEXT:    mv a1, a5
+; RV32IA-NEXT:    li a5, 0
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a5, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 5
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a5
+; RV64I-NEXT:    li a5, 0
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a5, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 5
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a5
+; RV64IA-WMO-NEXT:    li a5, 0
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a5, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 5
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a5
+; RV64IA-TSO-NEXT:    li a5, 0
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst monotonic
+  ret void
+}
+
+define void @cmpxchg_i128_seq_cst_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    li a5, 2
+; RV32I-NEXT:    mv a1, a6
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a6, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a5, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a5, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 5
+; RV32IA-NEXT:    li a5, 2
+; RV32IA-NEXT:    mv a1, a6
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a6, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 5
+; RV64I-NEXT:    li a5, 2
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a6
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a6, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 5
+; RV64IA-WMO-NEXT:    li a5, 2
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a6
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a6, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 5
+; RV64IA-TSO-NEXT:    li a5, 2
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a6
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst acquire
+  ret void
+}
+
+define void @cmpxchg_i128_seq_cst_seq_cst(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a2)
+; RV32I-NEXT:    lw a4, 8(a2)
+; RV32I-NEXT:    lw a2, 12(a2)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw t0, 28(sp)
+; RV32I-NEXT:    sw a1, 24(sp)
+; RV32I-NEXT:    sw a2, 20(sp)
+; RV32I-NEXT:    sw a4, 16(sp)
+; RV32I-NEXT:    sw a3, 12(sp)
+; RV32I-NEXT:    sw a0, 8(sp)
+; RV32I-NEXT:    li a0, 16
+; RV32I-NEXT:    addi a2, sp, 24
+; RV32I-NEXT:    addi a3, sp, 8
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    li a5, 5
+; RV32I-NEXT:    mv a1, a6
+; RV32I-NEXT:    call __atomic_compare_exchange at plt
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    mv a6, a0
+; RV32IA-NEXT:    lw a0, 0(a2)
+; RV32IA-NEXT:    lw a3, 4(a2)
+; RV32IA-NEXT:    lw a4, 8(a2)
+; RV32IA-NEXT:    lw a2, 12(a2)
+; RV32IA-NEXT:    lw a5, 12(a1)
+; RV32IA-NEXT:    lw a7, 8(a1)
+; RV32IA-NEXT:    lw t0, 4(a1)
+; RV32IA-NEXT:    lw a1, 0(a1)
+; RV32IA-NEXT:    sw a5, 36(sp)
+; RV32IA-NEXT:    sw a7, 32(sp)
+; RV32IA-NEXT:    sw t0, 28(sp)
+; RV32IA-NEXT:    sw a1, 24(sp)
+; RV32IA-NEXT:    sw a2, 20(sp)
+; RV32IA-NEXT:    sw a4, 16(sp)
+; RV32IA-NEXT:    sw a3, 12(sp)
+; RV32IA-NEXT:    sw a0, 8(sp)
+; RV32IA-NEXT:    li a0, 16
+; RV32IA-NEXT:    addi a2, sp, 24
+; RV32IA-NEXT:    addi a3, sp, 8
+; RV32IA-NEXT:    li a4, 5
+; RV32IA-NEXT:    li a5, 5
+; RV32IA-NEXT:    mv a1, a6
+; RV32IA-NEXT:    call __atomic_compare_exchange at plt
+; RV32IA-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv a6, a4
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    sd a1, 0(sp)
+; RV64I-NEXT:    mv a1, sp
+; RV64I-NEXT:    li a4, 5
+; RV64I-NEXT:    li a5, 5
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    mv a3, a6
+; RV64I-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    addi sp, sp, -32
+; RV64IA-WMO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT:    mv a6, a4
+; RV64IA-WMO-NEXT:    sd a2, 8(sp)
+; RV64IA-WMO-NEXT:    sd a1, 0(sp)
+; RV64IA-WMO-NEXT:    mv a1, sp
+; RV64IA-WMO-NEXT:    li a4, 5
+; RV64IA-WMO-NEXT:    li a5, 5
+; RV64IA-WMO-NEXT:    mv a2, a3
+; RV64IA-WMO-NEXT:    mv a3, a6
+; RV64IA-WMO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT:    addi sp, sp, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-ZACAS:       # %bb.0:
+; RV64IA-ZACAS-NEXT:    mv a5, a4
+; RV64IA-ZACAS-NEXT:    mv a7, a2
+; RV64IA-ZACAS-NEXT:    mv a4, a3
+; RV64IA-ZACAS-NEXT:    mv a6, a1
+; RV64IA-ZACAS-NEXT:    amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    addi sp, sp, -32
+; RV64IA-TSO-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT:    mv a6, a4
+; RV64IA-TSO-NEXT:    sd a2, 8(sp)
+; RV64IA-TSO-NEXT:    sd a1, 0(sp)
+; RV64IA-TSO-NEXT:    mv a1, sp
+; RV64IA-TSO-NEXT:    li a4, 5
+; RV64IA-TSO-NEXT:    li a5, 5
+; RV64IA-TSO-NEXT:    mv a2, a3
+; RV64IA-TSO-NEXT:    mv a3, a6
+; RV64IA-TSO-NEXT:    call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT:    addi sp, sp, 32
+; RV64IA-TSO-NEXT:    ret
+  %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst seq_cst
+  ret void
+}



More information about the llvm-commits mailing list