[llvm] [RISCV] Initial ISel support for the experimental zacas extension (PR #67918)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 1 07:44:43 PDT 2023
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/67918
This patch implements the v1.0-rc3 draft extension based on AArch64's version.
It introduces two register classes (GPRPI64/GPRPI128) and some pseudo instructions for correct register allocation.
These pseudo instructions will be expanded in the `RISCVExpandAtomicPseudoInsts` pass.
Migrated from https://reviews.llvm.org/D158956.
>From d874e3e400a437697b97fe186fce06c2b45f1e30 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 1 Oct 2023 22:42:12 +0800
Subject: [PATCH] [RISCV] Initial ISel support for the experimental zacas
extension
---
.../RISCV/RISCVExpandAtomicPseudoInsts.cpp | 145 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 71 +-
llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 22 +
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 20 +
.../RISCV/atomic-cmpxchg-branch-on-result.ll | 362 +-
.../test/CodeGen/RISCV/atomic-cmpxchg-flag.ll | 10 +
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 3720 +++++++++++++++--
7 files changed, 4007 insertions(+), 343 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index bb772fc5da92244..43abdc2b06f8af4 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -59,6 +59,9 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass {
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
+ bool expandAMOCAS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool IsPaired, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
#ifndef NDEBUG
unsigned getInstSizeInBytes(const MachineFunction &MF) const {
unsigned Size = 0;
@@ -145,6 +148,14 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case RISCV::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ case RISCV::PseudoAMOCAS_W:
+ return expandAMOCAS(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoAMOCAS_D_64:
+ return expandAMOCAS(MBB, MBBI, false, 64, NextMBBI);
+ case RISCV::PseudoAMOCAS_D_32:
+ return expandAMOCAS(MBB, MBBI, true, 64, NextMBBI);
+ case RISCV::PseudoAMOCAS_Q:
+ return expandAMOCAS(MBB, MBBI, true, 128, NextMBBI);
}
return false;
@@ -256,6 +267,74 @@ static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
llvm_unreachable("Unexpected SC width\n");
}
+static unsigned getAMOCASForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_W;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_W_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_D;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_D_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_D_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_D_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW128(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_Q;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_Q;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_Q_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_Q_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_Q_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
+ if (Width == 32)
+ return getAMOCASForRMW32(Ordering, Subtarget);
+ if (Width == 64)
+ return getAMOCASForRMW64(Ordering, Subtarget);
+ if (Width == 128)
+ return getAMOCASForRMW128(Ordering, Subtarget);
+ llvm_unreachable("Unexpected AMOCAS width\n");
+}
+
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
@@ -728,6 +807,72 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
return true;
}
+static Register getGPRPairEvenReg(Register PairedReg) {
+ switch (PairedReg) {
+ case RISCV::X0_PD:
+ return RISCV::X0;
+ case RISCV::X2_PD:
+ return RISCV::X2;
+ case RISCV::X4_PD:
+ return RISCV::X4;
+ case RISCV::X6_PD:
+ return RISCV::X6;
+ case RISCV::X8_PD:
+ return RISCV::X8;
+ case RISCV::X10_PD:
+ return RISCV::X10;
+ case RISCV::X12_PD:
+ return RISCV::X12;
+ case RISCV::X14_PD:
+ return RISCV::X14;
+ case RISCV::X16_PD:
+ return RISCV::X16;
+ case RISCV::X18_PD:
+ return RISCV::X18;
+ case RISCV::X20_PD:
+ return RISCV::X20;
+ case RISCV::X22_PD:
+ return RISCV::X22;
+ case RISCV::X24_PD:
+ return RISCV::X24;
+ case RISCV::X26_PD:
+ return RISCV::X26;
+ case RISCV::X28_PD:
+ return RISCV::X28;
+ case RISCV::X30_PD:
+ return RISCV::X30;
+ default:
+ llvm_unreachable("Unexpected GPR pair");
+ }
+}
+
+bool RISCVExpandAtomicPseudo::expandAMOCAS(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsPaired,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register DestReg = MI.getOperand(0).getReg();
+ if (IsPaired)
+ DestReg = getGPRPairEvenReg(DestReg);
+ Register AddrReg = MI.getOperand(1).getReg();
+ Register NewValReg = MI.getOperand(3).getReg();
+ if (IsPaired)
+ NewValReg = getGPRPairEvenReg(NewValReg);
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, MBBI, DL, TII->get(getAMOCASForRMW(Ordering, Width, STI)))
+ .addReg(DestReg)
+ .addReg(AddrReg)
+ .addReg(NewValReg);
+ NewMI->getOperand(0).setIsDef(true);
+
+ MI.eraseFromParent();
+ return true;
+}
+
} // end of anonymous namespace
INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e3975df1c4425d..de640deda82e92b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -552,7 +552,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtA()) {
- setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
+ if (Subtarget.hasStdExtZacas())
+ setMaxAtomicSizeInBitsSupported(Subtarget.getXLen() * 2);
+ else
+ setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else if (Subtarget.hasForcedAtomics()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
@@ -1249,6 +1252,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
XLenVT, LibCall);
}
+ // Set atomic_cmp_swap operations to expand to AMOCAS.D (RV32) and AMOCAS.Q
+ // (RV64).
+ if (Subtarget.hasStdExtZacas())
+ setOperationAction(ISD::ATOMIC_CMP_SWAP,
+ Subtarget.is64Bit() ? MVT::i128 : MVT::i64, Custom);
+
if (Subtarget.hasVendorXTHeadMemIdx()) {
for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
++im) {
@@ -10451,6 +10460,63 @@ static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
}
+// Create an even/odd pair of X registers holding integer value V.
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V, MVT VT,
+ MVT SubRegVT) {
+ SDLoc DL(V.getNode());
+ SDValue VLo = DAG.getAnyExtOrTrunc(V, DL, SubRegVT);
+ SDValue VHi = DAG.getAnyExtOrTrunc(
+ DAG.getNode(
+ ISD::SRL, DL, VT, V,
+ DAG.getConstant(SubRegVT == MVT::i64 ? 64 : 32, DL, SubRegVT)),
+ DL, SubRegVT);
+ SDValue RegClass = DAG.getTargetConstant(
+ VT == MVT::i128 ? RISCV::GPRPI128RegClassID : RISCV::GPRPI64RegClassID,
+ DL, MVT::i32);
+ SDValue SubReg0 = DAG.getTargetConstant(RISCV::sub_32, DL, MVT::i32);
+ SDValue SubReg1 = DAG.getTargetConstant(RISCV::sub_32_hi, DL, MVT::i32);
+ const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0);
+}
+
+static void ReplaceCMP_SWAP_2XLenResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = N->getSimpleValueType(0);
+ assert(N->getValueType(0) == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) &&
+ "AtomicCmpSwap on types less than 2*XLen should be legal");
+ assert(Subtarget.hasStdExtZacas());
+ MVT SubRegVT = (VT == MVT::i64 ? MVT::i32 : MVT::i64);
+
+ SDLoc DL(N);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ AtomicOrdering Ordering = MemOp->getMergedOrdering();
+ SDValue Ops[] = {
+ N->getOperand(1), // Ptr
+ createGPRPairNode(DAG, N->getOperand(2), VT, SubRegVT), // Compare value
+ createGPRPairNode(DAG, N->getOperand(3), VT, SubRegVT), // Store value
+ DAG.getTargetConstant(static_cast<unsigned>(Ordering), DL,
+ MVT::i32), // Ordering
+ N->getOperand(0), // Chain in
+ };
+
+ unsigned Opcode =
+ (VT == MVT::i64 ? RISCV::PseudoAMOCAS_D_32 : RISCV::PseudoAMOCAS_Q);
+ MachineSDNode *CmpSwap = DAG.getMachineNode(
+ Opcode, DL, DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
+ DAG.setNodeMemRefs(CmpSwap, {MemOp});
+
+ unsigned SubReg1 = RISCV::sub_32, SubReg2 = RISCV::sub_32_hi;
+ SDValue Lo =
+ DAG.getTargetExtractSubreg(SubReg1, DL, SubRegVT, SDValue(CmpSwap, 0));
+ SDValue Hi =
+ DAG.getTargetExtractSubreg(SubReg2, DL, SubRegVT, SDValue(CmpSwap, 0));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, VT, Lo, Hi));
+ Results.push_back(SDValue(CmpSwap, 1));
+}
+
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -10458,6 +10524,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom type legalize this operation!");
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceCMP_SWAP_2XLenResults(N, Results, DAG, Subtarget);
+ break;
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index c43af14bb7f7005..d175ae49f1919ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -295,6 +295,28 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
+let Predicates = [HasStdExtZacas] in {
+class PseudoAMOCAS<RegisterClass RC = GPR>
+ : Pseudo<(outs RC:$res),
+ (ins GPR:$addr, RC:$cmpval, RC:$newval, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res, $res = $cmpval";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+def PseudoAMOCAS_W: PseudoAMOCAS;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoAMOCAS_W>;
+
+let Predicates = [HasStdExtZacas, IsRV32] in {
+ def PseudoAMOCAS_D_32: PseudoAMOCAS<GPRPI64>;
+}
+let Predicates = [HasStdExtZacas, IsRV64] in {
+ def PseudoAMOCAS_D_64: PseudoAMOCAS;
+ defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoAMOCAS_D_64>;
+ def PseudoAMOCAS_Q: PseudoAMOCAS<GPRPI128>;
+}
+}
+
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index ab0d354967b34c7..9c3c8b85782f613 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -578,6 +578,26 @@ def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
X0_PD, X2_PD, X4_PD
)>;
+let RegInfos = RegInfoByHwMode<[RV32], [RegInfo<32, 32, 32>]> in
+def GPRPI64 : RegisterClass<"RISCV", [i64], 32, (add
+ X10_PD, X12_PD, X14_PD, X16_PD,
+ X6_PD,
+ X28_PD, X30_PD,
+ X8_PD,
+ X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+ X0_PD, X2_PD, X4_PD
+)>;
+
+let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
+def GPRPI128 : RegisterClass<"RISCV", [i128], 64, (add
+ X10_PD, X12_PD, X14_PD, X16_PD,
+ X6_PD,
+ X28_PD, X30_PD,
+ X8_PD,
+ X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+ X0_PD, X2_PD, X4_PD
+)>;
+
// The register class is added for inline assembly for vector mask types.
def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
index 651f58d324422f2..08c9aa685479dfa 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
@@ -1,30 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=CHECK,RV32IA %s
+; RUN: | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=CHECK,RV64IA %s
+; RUN: | FileCheck -check-prefixes=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA-ZACAS %s
; Test cmpxchg followed by a branch on the cmpxchg success value to see if the
; branch is folded into the cmpxchg expansion.
define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB0_1: # %do_cmpxchg
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_3 Depth 2
-; CHECK-NEXT: .LBB0_3: # %do_cmpxchg
-; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lr.w.aqrl a3, (a0)
-; CHECK-NEXT: bne a3, a1, .LBB0_1
-; CHECK-NEXT: # %bb.4: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2
-; CHECK-NEXT: sc.w.rl a4, a2, (a0)
-; CHECK-NEXT: bnez a4, .LBB0_3
-; CHECK-NEXT: # %bb.5: # %do_cmpxchg
-; CHECK-NEXT: # %bb.2: # %exit
-; CHECK-NEXT: ret
+; RV32IA-LABEL: cmpxchg_and_branch1:
+; RV32IA: # %bb.0: # %entry
+; RV32IA-NEXT: .LBB0_1: # %do_cmpxchg
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV32IA-NEXT: .LBB0_3: # %do_cmpxchg
+; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-NEXT: bne a3, a1, .LBB0_1
+; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT: bnez a4, .LBB0_3
+; RV32IA-NEXT: # %bb.5: # %do_cmpxchg
+; RV32IA-NEXT: # %bb.2: # %exit
+; RV32IA-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: mv a3, a1
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT: bne a3, a1, .LBB0_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch1:
+; RV64IA: # %bb.0: # %entry
+; RV64IA-NEXT: .LBB0_1: # %do_cmpxchg
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV64IA-NEXT: .LBB0_3: # %do_cmpxchg
+; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-NEXT: bne a3, a1, .LBB0_1
+; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT: bnez a4, .LBB0_3
+; RV64IA-NEXT: # %bb.5: # %do_cmpxchg
+; RV64IA-NEXT: # %bb.2: # %exit
+; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: mv a3, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB0_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
@@ -36,25 +78,65 @@ exit:
}
define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB1_1: # %do_cmpxchg
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_3 Depth 2
-; CHECK-NEXT: .LBB1_3: # %do_cmpxchg
-; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lr.w.aqrl a3, (a0)
-; CHECK-NEXT: bne a3, a1, .LBB1_5
-; CHECK-NEXT: # %bb.4: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB1_3 Depth=2
-; CHECK-NEXT: sc.w.rl a4, a2, (a0)
-; CHECK-NEXT: bnez a4, .LBB1_3
-; CHECK-NEXT: .LBB1_5: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: beq a3, a1, .LBB1_1
-; CHECK-NEXT: # %bb.2: # %exit
-; CHECK-NEXT: ret
+; RV32IA-LABEL: cmpxchg_and_branch2:
+; RV32IA: # %bb.0: # %entry
+; RV32IA-NEXT: .LBB1_1: # %do_cmpxchg
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV32IA-NEXT: .LBB1_3: # %do_cmpxchg
+; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-NEXT: bne a3, a1, .LBB1_5
+; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT: bnez a4, .LBB1_3
+; RV32IA-NEXT: .LBB1_5: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-NEXT: beq a3, a1, .LBB1_1
+; RV32IA-NEXT: # %bb.2: # %exit
+; RV32IA-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: mv a3, a1
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT: beq a3, a1, .LBB1_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch2:
+; RV64IA: # %bb.0: # %entry
+; RV64IA-NEXT: .LBB1_1: # %do_cmpxchg
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV64IA-NEXT: .LBB1_3: # %do_cmpxchg
+; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-NEXT: bne a3, a1, .LBB1_5
+; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT: bnez a4, .LBB1_3
+; RV64IA-NEXT: .LBB1_5: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-NEXT: beq a3, a1, .LBB1_1
+; RV64IA-NEXT: # %bb.2: # %exit
+; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: mv a3, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT: beq a3, a1, .LBB1_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
@@ -96,6 +178,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV32IA-NEXT: # %bb.2: # %exit
; RV32IA-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch1:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a4, a0, 3
+; RV32IA-ZACAS-NEXT: li a0, 255
+; RV32IA-ZACAS-NEXT: sll a0, a0, a4
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a4
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a4
+; RV32IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2
+; RV32IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1
+; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a4, a0
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_1
+; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2
+; RV32IA-ZACAS-NEXT: xor a5, a4, a2
+; RV32IA-ZACAS-NEXT: and a5, a5, a0
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_3
+; RV32IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-LABEL: cmpxchg_masked_and_branch1:
; RV64IA: # %bb.0: # %entry
; RV64IA-NEXT: andi a3, a0, -4
@@ -125,6 +237,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-NEXT: # %bb.5: # %do_cmpxchg
; RV64IA-NEXT: # %bb.2: # %exit
; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch1:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a4, a0, 3
+; RV64IA-ZACAS-NEXT: li a0, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a0, a4
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a4
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a2, a2, a4
+; RV64IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2
+; RV64IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1
+; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a4, a0
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_1
+; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2
+; RV64IA-ZACAS-NEXT: xor a5, a4, a2
+; RV64IA-ZACAS-NEXT: and a5, a5, a0
+; RV64IA-ZACAS-NEXT: xor a5, a4, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_3
+; RV64IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
@@ -169,6 +311,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV32IA-NEXT: # %bb.2: # %exit
; RV32IA-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch2:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a4, a0, 3
+; RV32IA-ZACAS-NEXT: li a0, 255
+; RV32IA-ZACAS-NEXT: sll a0, a0, a4
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a4
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a4
+; RV32IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2
+; RV32IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a4, a0
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_5
+; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2
+; RV32IA-ZACAS-NEXT: xor a5, a4, a2
+; RV32IA-ZACAS-NEXT: and a5, a5, a0
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_3
+; RV32IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT: and a4, a4, a0
+; RV32IA-ZACAS-NEXT: beq a1, a4, .LBB3_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-LABEL: cmpxchg_masked_and_branch2:
; RV64IA: # %bb.0: # %entry
; RV64IA-NEXT: andi a3, a0, -4
@@ -201,6 +376,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-NEXT: beq a1, a4, .LBB3_1
; RV64IA-NEXT: # %bb.2: # %exit
; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch2:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a4, a0, 3
+; RV64IA-ZACAS-NEXT: li a0, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a0, a4
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a4
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a2, a2, a4
+; RV64IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2
+; RV64IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a4, a0
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_5
+; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2
+; RV64IA-ZACAS-NEXT: xor a5, a4, a2
+; RV64IA-ZACAS-NEXT: and a5, a5, a0
+; RV64IA-ZACAS-NEXT: xor a5, a4, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_3
+; RV64IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT: and a4, a4, a0
+; RV64IA-ZACAS-NEXT: beq a1, a4, .LBB3_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
@@ -212,25 +420,65 @@ exit:
}
define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signext %val, i1 zeroext %bool) nounwind {
-; CHECK-LABEL: cmpxchg_and_irrelevant_branch:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB4_1: # %do_cmpxchg
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_3 Depth 2
-; CHECK-NEXT: .LBB4_3: # %do_cmpxchg
-; CHECK-NEXT: # Parent Loop BB4_1 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lr.w.aqrl a4, (a0)
-; CHECK-NEXT: bne a4, a1, .LBB4_5
-; CHECK-NEXT: # %bb.4: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB4_3 Depth=2
-; CHECK-NEXT: sc.w.rl a5, a2, (a0)
-; CHECK-NEXT: bnez a5, .LBB4_3
-; CHECK-NEXT: .LBB4_5: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB4_1 Depth=1
-; CHECK-NEXT: beqz a3, .LBB4_1
-; CHECK-NEXT: # %bb.2: # %exit
-; CHECK-NEXT: ret
+; RV32IA-LABEL: cmpxchg_and_irrelevant_branch:
+; RV32IA: # %bb.0: # %entry
+; RV32IA-NEXT: .LBB4_1: # %do_cmpxchg
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB4_3 Depth 2
+; RV32IA-NEXT: .LBB4_3: # %do_cmpxchg
+; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a4, (a0)
+; RV32IA-NEXT: bne a4, a1, .LBB4_5
+; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a5, a2, (a0)
+; RV32IA-NEXT: bnez a5, .LBB4_3
+; RV32IA-NEXT: .LBB4_5: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-NEXT: beqz a3, .LBB4_1
+; RV32IA-NEXT: # %bb.2: # %exit
+; RV32IA-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: mv a4, a1
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0)
+; RV32IA-ZACAS-NEXT: beqz a3, .LBB4_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV64IA-LABEL: cmpxchg_and_irrelevant_branch:
+; RV64IA: # %bb.0: # %entry
+; RV64IA-NEXT: .LBB4_1: # %do_cmpxchg
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB4_3 Depth 2
+; RV64IA-NEXT: .LBB4_3: # %do_cmpxchg
+; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a4, (a0)
+; RV64IA-NEXT: bne a4, a1, .LBB4_5
+; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a5, a2, (a0)
+; RV64IA-NEXT: bnez a5, .LBB4_3
+; RV64IA-NEXT: .LBB4_5: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-NEXT: beqz a3, .LBB4_1
+; RV64IA-NEXT: # %bb.2: # %exit
+; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: mv a4, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0)
+; RV64IA-ZACAS-NEXT: beqz a3, .LBB4_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
index f25571b5cf25310..4c1ac38be0630e8 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64IA-ZACAS %s
; This test ensures that the output of the 'lr.w' instruction is sign-extended.
; Previously, the default zero-extension was being used and 'cmp' parameter
@@ -21,6 +23,14 @@ define i1 @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 signext %cmp,
; RV64IA-NEXT: xor a1, a3, a1
; RV64IA-NEXT: seqz a0, a1
; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: mv a3, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT: xor a1, a3, a1
+; RV64IA-ZACAS-NEXT: seqz a0, a1
+; RV64IA-ZACAS-NEXT: ret
i32 signext %val) nounwind {
entry:
%0 = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index f900b5161f75128..faeb3512c317e1b 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -3,12 +3,16 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
@@ -125,6 +129,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB1_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-ZACAS-NEXT: .LBB1_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -184,6 +211,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB1_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV64IA-ZACAS-NEXT: .LBB1_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -206,6 +256,50 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB1_1
; RV64IA-TSO-NEXT: .LBB1_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB1_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
ret void
}
@@ -247,6 +341,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB2_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-ZACAS-NEXT: .LBB2_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -306,6 +423,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB2_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV64IA-ZACAS-NEXT: .LBB2_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -328,6 +468,50 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB2_1
; RV64IA-TSO-NEXT: .LBB2_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB2_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
ret void
}
@@ -369,6 +553,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB3_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-ZACAS-NEXT: .LBB3_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -428,6 +635,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB3_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV64IA-ZACAS-NEXT: .LBB3_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -450,6 +680,50 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB3_1
; RV64IA-TSO-NEXT: .LBB3_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB3_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
ret void
}
@@ -491,6 +765,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB4_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-ZACAS-NEXT: .LBB4_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -550,6 +847,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB4_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV64IA-ZACAS-NEXT: .LBB4_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -572,6 +892,50 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB4_1
; RV64IA-TSO-NEXT: .LBB4_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB4_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB4_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire
ret void
}
@@ -613,6 +977,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB5_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV32IA-ZACAS-NEXT: .LBB5_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -672,6 +1059,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB5_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV64IA-ZACAS-NEXT: .LBB5_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -694,6 +1104,50 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB5_1
; RV64IA-TSO-NEXT: .LBB5_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB5_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB5_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
ret void
}
@@ -735,6 +1189,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-WMO-NEXT: .LBB6_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a4, 255
+; RV32IA-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV32IA-ZACAS-NEXT: .LBB6_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -794,6 +1271,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-WMO-NEXT: .LBB6_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: li a4, 255
+; RV64IA-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV64IA-ZACAS-NEXT: .LBB6_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -816,6 +1316,50 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-TSO-NEXT: bnez a5, .LBB6_1
; RV64IA-TSO-NEXT: .LBB6_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a4, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB6_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: li a4, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB6_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire
ret void
}
@@ -1164,6 +1708,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32IA-WMO-NEXT: .LBB11_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB11_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB11_1
+; RV32IA-ZACAS-NEXT: .LBB11_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1225,6 +1793,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: .LBB11_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB11_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB11_1
+; RV64IA-ZACAS-NEXT: .LBB11_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1248,6 +1840,52 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB11_1
; RV64IA-TSO-NEXT: .LBB11_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB11_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB11_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
ret void
}
@@ -1290,6 +1928,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA-WMO-NEXT: .LBB12_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB12_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB12_1
+; RV32IA-ZACAS-NEXT: .LBB12_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1351,6 +2013,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: .LBB12_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB12_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB12_1
+; RV64IA-ZACAS-NEXT: .LBB12_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1374,6 +2060,52 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB12_1
; RV64IA-TSO-NEXT: .LBB12_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB12_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB12_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
ret void
}
@@ -1416,6 +2148,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32IA-WMO-NEXT: .LBB13_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB13_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB13_1
+; RV32IA-ZACAS-NEXT: .LBB13_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1477,6 +2233,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: .LBB13_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB13_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB13_1
+; RV64IA-ZACAS-NEXT: .LBB13_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1500,6 +2280,52 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB13_1
; RV64IA-TSO-NEXT: .LBB13_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB13_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB13_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
ret void
}
@@ -1542,6 +2368,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA-WMO-NEXT: .LBB14_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB14_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB14_1
+; RV32IA-ZACAS-NEXT: .LBB14_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1603,6 +2453,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: .LBB14_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB14_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB14_1
+; RV64IA-ZACAS-NEXT: .LBB14_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1626,6 +2500,52 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB14_1
; RV64IA-TSO-NEXT: .LBB14_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB14_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB14_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire
ret void
}
@@ -1668,6 +2588,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32IA-WMO-NEXT: .LBB15_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB15_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB15_1
+; RV32IA-ZACAS-NEXT: .LBB15_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1729,6 +2673,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: .LBB15_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB15_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB15_1
+; RV64IA-ZACAS-NEXT: .LBB15_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1752,6 +2720,52 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB15_1
; RV64IA-TSO-NEXT: .LBB15_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB15_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB15_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
ret void
}
@@ -1794,6 +2808,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA-WMO-NEXT: .LBB16_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a4, 16
+; RV32IA-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB16_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-ZACAS-NEXT: bnez a4, .LBB16_1
+; RV32IA-ZACAS-NEXT: .LBB16_3:
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: andi a3, a0, -4
@@ -1855,6 +2893,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: .LBB16_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: lui a4, 16
+; RV64IA-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB16_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-ZACAS-NEXT: bnez a4, .LBB16_1
+; RV64IA-ZACAS-NEXT: .LBB16_3:
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: andi a3, a0, -4
@@ -1878,6 +2940,52 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB16_1
; RV64IA-TSO-NEXT: .LBB16_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: lui a4, 16
+; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3)
+; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3)
+; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB16_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire
ret void
}
@@ -2130,16 +3238,32 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i32_monotonic_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a0)
-; RV32IA-NEXT: bne a3, a1, .LBB20_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
-; RV32IA-NEXT: sc.w a4, a2, (a0)
-; RV32IA-NEXT: bnez a4, .LBB20_1
-; RV32IA-NEXT: .LBB20_3:
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB20_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB20_1
+; RV32IA-WMO-NEXT: .LBB20_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB20_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB20_1
+; RV32IA-TSO-NEXT: .LBB20_3:
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic:
; RV64I: # %bb.0:
@@ -2154,17 +3278,44 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i32_monotonic_monotonic:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: sext.w a1, a1
-; RV64IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB20_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
-; RV64IA-NEXT: sc.w a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB20_1
-; RV64IA-NEXT: .LBB20_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: sext.w a1, a1
+; RV64IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB20_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB20_1
+; RV64IA-WMO-NEXT: .LBB20_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: sext.w a1, a1
+; RV64IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB20_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB20_1
+; RV64IA-TSO-NEXT: .LBB20_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
ret void
}
@@ -2194,6 +3345,11 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32IA-WMO-NEXT: .LBB21_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
@@ -2230,6 +3386,12 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-WMO-NEXT: .LBB21_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2241,6 +3403,15 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB21_1
; RV64IA-TSO-NEXT: .LBB21_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
ret void
}
@@ -2270,6 +3441,11 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32IA-WMO-NEXT: .LBB22_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
@@ -2306,6 +3482,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-WMO-NEXT: .LBB22_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2317,6 +3499,15 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB22_1
; RV64IA-TSO-NEXT: .LBB22_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
ret void
}
@@ -2346,6 +3537,11 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32IA-WMO-NEXT: .LBB23_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
@@ -2382,6 +3578,12 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-WMO-NEXT: .LBB23_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2393,6 +3595,15 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB23_1
; RV64IA-TSO-NEXT: .LBB23_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
ret void
}
@@ -2422,6 +3633,11 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32IA-WMO-NEXT: .LBB24_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
@@ -2458,6 +3674,12 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-WMO-NEXT: .LBB24_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2469,6 +3691,15 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB24_1
; RV64IA-TSO-NEXT: .LBB24_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire
ret void
}
@@ -2498,6 +3729,11 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32IA-WMO-NEXT: .LBB25_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
@@ -2534,6 +3770,12 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-WMO-NEXT: .LBB25_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2545,6 +3787,15 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB25_1
; RV64IA-TSO-NEXT: .LBB25_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
ret void
}
@@ -2574,6 +3825,11 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32IA-WMO-NEXT: .LBB26_3:
; RV32IA-WMO-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
@@ -2610,6 +3866,12 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-WMO-NEXT: .LBB26_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: sext.w a1, a1
@@ -2621,6 +3883,15 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB26_1
; RV64IA-TSO-NEXT: .LBB26_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire
ret void
}
@@ -2639,16 +3910,32 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a0)
-; RV32IA-NEXT: bne a3, a1, .LBB27_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT: bnez a4, .LBB27_1
-; RV32IA-NEXT: .LBB27_3:
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB27_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB27_1
+; RV32IA-WMO-NEXT: .LBB27_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB27_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB27_1
+; RV32IA-TSO-NEXT: .LBB27_3:
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic:
; RV64I: # %bb.0:
@@ -2663,17 +3950,44 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_monotonic:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: sext.w a1, a1
-; RV64IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB27_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
-; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB27_1
-; RV64IA-NEXT: .LBB27_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: sext.w a1, a1
+; RV64IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB27_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB27_1
+; RV64IA-WMO-NEXT: .LBB27_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: sext.w a1, a1
+; RV64IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB27_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB27_1
+; RV64IA-TSO-NEXT: .LBB27_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
ret void
}
@@ -2692,16 +4006,32 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_acquire:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a0)
-; RV32IA-NEXT: bne a3, a1, .LBB28_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT: bnez a4, .LBB28_1
-; RV32IA-NEXT: .LBB28_3:
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB28_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB28_1
+; RV32IA-WMO-NEXT: .LBB28_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB28_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB28_1
+; RV32IA-TSO-NEXT: .LBB28_3:
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire:
; RV64I: # %bb.0:
@@ -2716,17 +4046,44 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_acquire:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: sext.w a1, a1
-; RV64IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB28_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
-; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB28_1
-; RV64IA-NEXT: .LBB28_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: sext.w a1, a1
+; RV64IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB28_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB28_1
+; RV64IA-WMO-NEXT: .LBB28_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: sext.w a1, a1
+; RV64IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB28_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB28_1
+; RV64IA-TSO-NEXT: .LBB28_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire
ret void
}
@@ -2745,16 +4102,32 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a0)
-; RV32IA-NEXT: bne a3, a1, .LBB29_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV32IA-NEXT: bnez a4, .LBB29_1
-; RV32IA-NEXT: .LBB29_3:
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB29_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB29_1
+; RV32IA-WMO-NEXT: .LBB29_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB29_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB29_1
+; RV32IA-TSO-NEXT: .LBB29_3:
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64I: # %bb.0:
@@ -2769,17 +4142,44 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: sext.w a1, a1
-; RV64IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB29_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
-; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB29_1
-; RV64IA-NEXT: .LBB29_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: sext.w a1, a1
+; RV64IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB29_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB29_1
+; RV64IA-WMO-NEXT: .LBB29_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: sext.w a1, a1
+; RV64IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB29_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB29_1
+; RV64IA-TSO-NEXT: .LBB29_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
ret void
}
@@ -2801,21 +4201,46 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a4
-; RV32IA-NEXT: li a4, 0
-; RV32IA-NEXT: li a5, 0
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a4
+; RV32IA-WMO-NEXT: li a4, 0
+; RV32IA-WMO-NEXT: li a5, 0
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a4
+; RV32IA-TSO-NEXT: li a4, 0
+; RV32IA-TSO-NEXT: li a5, 0
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV64I: # %bb.0:
@@ -2830,16 +4255,44 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i64_monotonic_monotonic:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.d a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB30_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1
-; RV64IA-NEXT: sc.d a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB30_1
-; RV64IA-NEXT: .LBB30_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB30_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB30_1
+; RV64IA-WMO-NEXT: .LBB30_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB30_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB30_1
+; RV64IA-TSO-NEXT: .LBB30_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
ret void
}
@@ -2862,22 +4315,48 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 2
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a5
-; RV32IA-NEXT: li a5, 0
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a5, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 2
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a5
+; RV32IA-WMO-NEXT: li a5, 0
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a5, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 2
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a5
+; RV32IA-TSO-NEXT: li a5, 0
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_monotonic:
; RV64I: # %bb.0:
@@ -2903,6 +4382,11 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-WMO-NEXT: .LBB31_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
@@ -2913,6 +4397,18 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB31_1
; RV64IA-TSO-NEXT: .LBB31_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
ret void
}
@@ -2935,22 +4431,48 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_acquire_acquire:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 2
-; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a6
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a6, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 2
+; RV32IA-WMO-NEXT: li a5, 2
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a6
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a6, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 2
+; RV32IA-TSO-NEXT: li a5, 2
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a6
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_acquire:
; RV64I: # %bb.0:
@@ -2976,6 +4498,11 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-WMO-NEXT: .LBB32_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
@@ -2986,6 +4513,18 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB32_1
; RV64IA-TSO-NEXT: .LBB32_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
ret void
}
@@ -3008,22 +4547,48 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_release_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a5
-; RV32IA-NEXT: li a5, 0
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a5, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 3
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a5
+; RV32IA-WMO-NEXT: li a5, 0
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.rl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a5, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 3
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a5
+; RV32IA-TSO-NEXT: li a5, 0
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_monotonic:
; RV64I: # %bb.0:
@@ -3049,6 +4614,11 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-WMO-NEXT: .LBB33_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.rl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
@@ -3059,6 +4629,18 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB33_1
; RV64IA-TSO-NEXT: .LBB33_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.rl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
ret void
}
@@ -3081,22 +4663,48 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_release_acquire:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a6
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a6, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 3
+; RV32IA-WMO-NEXT: li a5, 2
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a6
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a6, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 3
+; RV32IA-TSO-NEXT: li a5, 2
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a6
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_acquire:
; RV64I: # %bb.0:
@@ -3122,6 +4730,11 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-WMO-NEXT: .LBB34_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
@@ -3132,6 +4745,18 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB34_1
; RV64IA-TSO-NEXT: .LBB34_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire
ret void
}
@@ -3154,22 +4779,48 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 4
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a5
-; RV32IA-NEXT: li a5, 0
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a5, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 4
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a5
+; RV32IA-WMO-NEXT: li a5, 0
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a5, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 4
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a5
+; RV32IA-TSO-NEXT: li a5, 0
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV64I: # %bb.0:
@@ -3195,6 +4846,11 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-WMO-NEXT: .LBB35_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
@@ -3205,6 +4861,18 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64IA-TSO-NEXT: bnez a4, .LBB35_1
; RV64IA-TSO-NEXT: .LBB35_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
ret void
}
@@ -3227,22 +4895,48 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 4
-; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a6
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a6, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 4
+; RV32IA-WMO-NEXT: li a5, 2
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a6
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a6, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 4
+; RV32IA-TSO-NEXT: li a5, 2
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a6
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV64I: # %bb.0:
@@ -3268,6 +4962,11 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-WMO-NEXT: .LBB36_3:
; RV64IA-WMO-NEXT: ret
;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
@@ -3278,6 +4977,18 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64IA-TSO-NEXT: bnez a4, .LBB36_1
; RV64IA-TSO-NEXT: .LBB36_3:
; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire
ret void
}
@@ -3300,22 +5011,48 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 5
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a5
-; RV32IA-NEXT: li a5, 0
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a5, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 5
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a5
+; RV32IA-WMO-NEXT: li a5, 0
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a5, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 5
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a5
+; RV32IA-TSO-NEXT: li a5, 0
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV64I: # %bb.0:
@@ -3330,16 +5067,44 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.d.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB37_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1
-; RV64IA-NEXT: sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB37_1
-; RV64IA-NEXT: .LBB37_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB37_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB37_1
+; RV64IA-WMO-NEXT: .LBB37_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB37_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB37_1
+; RV64IA-TSO-NEXT: .LBB37_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
ret void
}
@@ -3362,22 +5127,48 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 5
-; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a6
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a6, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 5
+; RV32IA-WMO-NEXT: li a5, 2
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a6
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a6, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 5
+; RV32IA-TSO-NEXT: li a5, 2
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a6
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV64I: # %bb.0:
@@ -3392,16 +5183,44 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_acquire:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.d.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB38_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
-; RV64IA-NEXT: sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB38_1
-; RV64IA-NEXT: .LBB38_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB38_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB38_1
+; RV64IA-WMO-NEXT: .LBB38_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB38_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB38_1
+; RV64IA-TSO-NEXT: .LBB38_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire
ret void
}
@@ -3424,25 +5243,51 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-; RV32IA-NEXT: li a4, 5
-; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a2, a3
-; RV32IA-NEXT: mv a3, a6
-; RV32IA-NEXT: call __atomic_compare_exchange_8 at plt
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
-; RV32IA-NEXT: ret
+; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: addi sp, sp, -16
+; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-WMO-NEXT: mv a6, a4
+; RV32IA-WMO-NEXT: sw a2, 4(sp)
+; RV32IA-WMO-NEXT: sw a1, 0(sp)
+; RV32IA-WMO-NEXT: mv a1, sp
+; RV32IA-WMO-NEXT: li a4, 5
+; RV32IA-WMO-NEXT: li a5, 5
+; RV32IA-WMO-NEXT: mv a2, a3
+; RV32IA-WMO-NEXT: mv a3, a6
+; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-WMO-NEXT: addi sp, sp, 16
+; RV32IA-WMO-NEXT: ret
;
-; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV64I: # %bb.0:
+; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: mv a7, a2
+; RV32IA-ZACAS-NEXT: mv a4, a3
+; RV32IA-ZACAS-NEXT: mv a6, a1
+; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: addi sp, sp, -16
+; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-TSO-NEXT: mv a6, a4
+; RV32IA-TSO-NEXT: sw a2, 4(sp)
+; RV32IA-TSO-NEXT: sw a1, 0(sp)
+; RV32IA-TSO-NEXT: mv a1, sp
+; RV32IA-TSO-NEXT: li a4, 5
+; RV32IA-TSO-NEXT: li a5, 5
+; RV32IA-TSO-NEXT: mv a2, a3
+; RV32IA-TSO-NEXT: mv a3, a6
+; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-TSO-NEXT: addi sp, sp, 16
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd a1, 0(sp)
@@ -3454,16 +5299,1321 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
-; RV64IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
-; RV64IA: # %bb.0:
-; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.d.aqrl a3, (a0)
-; RV64IA-NEXT: bne a3, a1, .LBB39_3
-; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
-; RV64IA-NEXT: sc.d.rl a4, a2, (a0)
-; RV64IA-NEXT: bnez a4, .LBB39_1
-; RV64IA-NEXT: .LBB39_3:
-; RV64IA-NEXT: ret
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB39_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB39_1
+; RV64IA-WMO-NEXT: .LBB39_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB39_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB39_1
+; RV64IA-TSO-NEXT: .LBB39_3:
+; RV64IA-TSO-NEXT: ret
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a7, a2
+; RV32IA-WMO-ZACAS-NEXT: mv a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a1
+; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
ret void
}
+
+define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a4, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a5, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a6, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a6, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a5, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a4, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a5, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a6, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a6, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a5, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: li a4, 0
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a4
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: li a5, 0
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a4
+; RV64IA-WMO-NEXT: li a4, 0
+; RV64IA-WMO-NEXT: li a5, 0
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a4
+; RV64IA-TSO-NEXT: li a4, 0
+; RV64IA-TSO-NEXT: li a5, 0
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
+ ret void
+}
+
+define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a6, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a6, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a6, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a6, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 2
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a5, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a5
+; RV64I-NEXT: li a5, 0
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a5, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 2
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a5
+; RV64IA-WMO-NEXT: li a5, 0
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a5, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 2
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a5
+; RV64IA-TSO-NEXT: li a5, 0
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic
+ ret void
+}
+
+define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acquire_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acquire_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a5, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a5, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 2
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a1, a6
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a6, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: li a5, 2
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a6
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a6, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 2
+; RV64IA-WMO-NEXT: li a5, 2
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a6
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a6, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 2
+; RV64IA-TSO-NEXT: li a5, 2
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a6
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i128_release_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_release_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a6, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a6, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 3
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_release_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a6, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a6, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 3
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_release_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a5, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 3
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a5
+; RV64I-NEXT: li a5, 0
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a5, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 3
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a5
+; RV64IA-WMO-NEXT: li a5, 0
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.rl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_release_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a5, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 3
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a5
+; RV64IA-TSO-NEXT: li a5, 0
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release monotonic
+ ret void
+}
+
+define void @cmpxchg_i128_release_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_release_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 3
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_release_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a5, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a5, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 3
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a1, a6
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_release_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a6, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 3
+; RV64I-NEXT: li a5, 2
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a6
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a6, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 3
+; RV64IA-WMO-NEXT: li a5, 2
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a6
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_release_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a6, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 3
+; RV64IA-TSO-NEXT: li a5, 2
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a6
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release acquire
+ ret void
+}
+
+define void @cmpxchg_i128_acq_rel_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a6, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a6, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 4
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a6, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a6, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 4
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a5, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 4
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a5
+; RV64I-NEXT: li a5, 0
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a5, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 4
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a5
+; RV64IA-WMO-NEXT: li a5, 0
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a5, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 4
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a5
+; RV64IA-TSO-NEXT: li a5, 0
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel monotonic
+ ret void
+}
+
+define void @cmpxchg_i128_acq_rel_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 4
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a5, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a5, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 4
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a1, a6
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a6, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 4
+; RV64I-NEXT: li a5, 2
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a6
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a6, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 4
+; RV64IA-WMO-NEXT: li a5, 2
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a6
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a6, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 4
+; RV64IA-TSO-NEXT: li a5, 2
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a6
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel acquire
+ ret void
+}
+
+define void @cmpxchg_i128_seq_cst_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a6, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a6, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a6, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a6, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a5, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a5
+; RV64I-NEXT: li a5, 0
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a5, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 5
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a5
+; RV64IA-WMO-NEXT: li a5, 0
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a5, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 5
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a5
+; RV64IA-TSO-NEXT: li a5, 0
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst monotonic
+ ret void
+}
+
+define void @cmpxchg_i128_seq_cst_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a5, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a5, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a1, a6
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a6, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: li a5, 2
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a6
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a6, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 5
+; RV64IA-WMO-NEXT: li a5, 2
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a6
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a6, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 5
+; RV64IA-TSO-NEXT: li a5, 2
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a6
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst acquire
+ ret void
+}
+
+define void @cmpxchg_i128_seq_cst_seq_cst(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a0
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
+; RV32I-NEXT: lw a4, 8(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 8(a1)
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a7, 32(sp)
+; RV32I-NEXT: sw t0, 28(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: sw a3, 12(sp)
+; RV32I-NEXT: sw a0, 8(sp)
+; RV32I-NEXT: li a0, 16
+; RV32I-NEXT: addi a2, sp, 24
+; RV32I-NEXT: addi a3, sp, 8
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 5
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: call __atomic_compare_exchange at plt
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a0
+; RV32IA-NEXT: lw a0, 0(a2)
+; RV32IA-NEXT: lw a3, 4(a2)
+; RV32IA-NEXT: lw a4, 8(a2)
+; RV32IA-NEXT: lw a2, 12(a2)
+; RV32IA-NEXT: lw a5, 12(a1)
+; RV32IA-NEXT: lw a7, 8(a1)
+; RV32IA-NEXT: lw t0, 4(a1)
+; RV32IA-NEXT: lw a1, 0(a1)
+; RV32IA-NEXT: sw a5, 36(sp)
+; RV32IA-NEXT: sw a7, 32(sp)
+; RV32IA-NEXT: sw t0, 28(sp)
+; RV32IA-NEXT: sw a1, 24(sp)
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a4, 16(sp)
+; RV32IA-NEXT: sw a3, 12(sp)
+; RV32IA-NEXT: sw a0, 8(sp)
+; RV32IA-NEXT: li a0, 16
+; RV32IA-NEXT: addi a2, sp, 24
+; RV32IA-NEXT: addi a3, sp, 8
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 5
+; RV32IA-NEXT: mv a1, a6
+; RV32IA-NEXT: call __atomic_compare_exchange at plt
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv a6, a4
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: li a5, 5
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: mv a3, a6
+; RV64I-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: addi sp, sp, -32
+; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-WMO-NEXT: mv a6, a4
+; RV64IA-WMO-NEXT: sd a2, 8(sp)
+; RV64IA-WMO-NEXT: sd a1, 0(sp)
+; RV64IA-WMO-NEXT: mv a1, sp
+; RV64IA-WMO-NEXT: li a4, 5
+; RV64IA-WMO-NEXT: li a5, 5
+; RV64IA-WMO-NEXT: mv a2, a3
+; RV64IA-WMO-NEXT: mv a3, a6
+; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-WMO-NEXT: addi sp, sp, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: mv a5, a4
+; RV64IA-ZACAS-NEXT: mv a7, a2
+; RV64IA-ZACAS-NEXT: mv a4, a3
+; RV64IA-ZACAS-NEXT: mv a6, a1
+; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: addi sp, sp, -32
+; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IA-TSO-NEXT: mv a6, a4
+; RV64IA-TSO-NEXT: sd a2, 8(sp)
+; RV64IA-TSO-NEXT: sd a1, 0(sp)
+; RV64IA-TSO-NEXT: mv a1, sp
+; RV64IA-TSO-NEXT: li a4, 5
+; RV64IA-TSO-NEXT: li a5, 5
+; RV64IA-TSO-NEXT: mv a2, a3
+; RV64IA-TSO-NEXT: mv a3, a6
+; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IA-TSO-NEXT: addi sp, sp, 32
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst seq_cst
+ ret void
+}
More information about the llvm-commits
mailing list