[llvm] fa63642 - [RISC-V][GlobaISel] Legalize G_ATOMIC_CMPXCHG and G_ATOMIC_CMPXCHG_WITH_SUCCESS (#157634)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 23:52:40 PDT 2025
Author: Kane Wang
Date: 2025-09-11T15:52:36+09:00
New Revision: fa63642dcebbf74c051fc058679c4eaebaa82dde
URL: https://github.com/llvm/llvm-project/commit/fa63642dcebbf74c051fc058679c4eaebaa82dde
DIFF: https://github.com/llvm/llvm-project/commit/fa63642dcebbf74c051fc058679c4eaebaa82dde.diff
LOG: [RISC-V][GlobaISel] Legalize G_ATOMIC_CMPXCHG and G_ATOMIC_CMPXCHG_WITH_SUCCESS (#157634)
This change introduces legalization for `G_ATOMIC_CMPXCHG` and
`G_ATOMIC_CMPXCHG_WITH_SUCCESS`. Additionally, support for the
`riscv_masked_cmpxchg intrinsic` is added to legalizeIntrinsic, ensuring
that masked compare-and-exchange operations are recognized during
legalization.
---------
Co-authored-by: Kane Wang <kanewang95 at foxmail.com>
Added:
llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir
Modified:
llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index f6f00eacdf0a2..48e88befa0f01 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -702,7 +702,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
- getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+ .lowerIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(2, p0)));
+
+ getActionDefinitionsBuilder({G_ATOMIC_CMPXCHG, G_ATOMICRMW_ADD})
.legalFor(ST.hasStdExtA(), {{sXLen, p0}})
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
.clampScalar(0, sXLen, sXLen);
@@ -751,6 +754,7 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
}
case Intrinsic::riscv_masked_atomicrmw_add:
case Intrinsic::riscv_masked_atomicrmw_sub:
+ case Intrinsic::riscv_masked_cmpxchg:
return true;
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll
new file mode 100644
index 0000000000000..2fb9dcfeef785
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll
@@ -0,0 +1,5910 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=riscv32 < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+zacas < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO-ZACAS %s
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+ztso < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+ztso,+zacas < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO-ZACAS %s
+; RUN: llc -global-isel -mtriple=riscv64 < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+zacas < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO-ZACAS %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+zacas,+zabha < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-WMO-ZABHA %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso,+zacas < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO-ZACAS %s
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso,+zacas,+zabha < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-TSO-ZABHA %s
+
+define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: zext.b a2, a2
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a3, a3, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w a2, (a4)
+; RV32IA-NEXT: and a5, a2, a3
+; RV32IA-NEXT: bne a5, a1, .LBB0_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; RV32IA-NEXT: xor a5, a2, a0
+; RV32IA-NEXT: and a5, a5, a3
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w a5, a5, (a4)
+; RV32IA-NEXT: bnez a5, .LBB0_1
+; RV32IA-NEXT: .LBB0_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB0_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB0_1
+; RV64IA-WMO-NEXT: .LBB0_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: li a3, 255
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB0_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB0_1
+; RV64IA-ZACAS-NEXT: .LBB0_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB0_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB0_1
+; RV64IA-TSO-NEXT: .LBB0_3:
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
+ ret void
+}
+
+define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-NEXT: .LBB1_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-NEXT: .LBB1_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB1_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB1_1
+; RV64IA-WMO-NEXT: .LBB1_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB1_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB1_1
+; RV64IA-TSO-NEXT: .LBB1_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB1_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acquire_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
+ ret void
+}
+
+define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-NEXT: .LBB2_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-NEXT: .LBB2_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB2_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB2_1
+; RV64IA-WMO-NEXT: .LBB2_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB2_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB2_1
+; RV64IA-TSO-NEXT: .LBB2_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB2_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acquire_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_release_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-NEXT: .LBB3_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-NEXT: .LBB3_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB3_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_release_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB3_1
+; RV64IA-WMO-NEXT: .LBB3_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB3_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.rl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB3_1
+; RV64IA-TSO-NEXT: .LBB3_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB3_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_release_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
+ ret void
+}
+
+define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_release_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB4_1
+; RV32IA-WMO-NEXT: .LBB4_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB4_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB4_1
+; RV32IA-TSO-NEXT: .LBB4_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB4_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_release_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB4_1
+; RV64IA-WMO-NEXT: .LBB4_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB4_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB4_1
+; RV64IA-TSO-NEXT: .LBB4_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB4_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_release_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire
+ ret void
+}
+
+define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB5_1
+; RV32IA-WMO-NEXT: .LBB5_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB5_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB5_1
+; RV32IA-TSO-NEXT: .LBB5_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB5_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB5_1
+; RV64IA-WMO-NEXT: .LBB5_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB5_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB5_1
+; RV64IA-TSO-NEXT: .LBB5_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB5_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acq_rel_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
+ ret void
+}
+
+define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: li a3, 255
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: zext.b a1, a1
+; RV32IA-WMO-NEXT: zext.b a2, a2
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a5, a2, a3
+; RV32IA-WMO-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-WMO-NEXT: xor a5, a2, a0
+; RV32IA-WMO-NEXT: and a5, a5, a3
+; RV32IA-WMO-NEXT: xor a5, a2, a5
+; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-NEXT: bnez a5, .LBB6_1
+; RV32IA-WMO-NEXT: .LBB6_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB6_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: li a3, 255
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: zext.b a1, a1
+; RV32IA-TSO-NEXT: zext.b a2, a2
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a5, a2, a3
+; RV32IA-TSO-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-TSO-NEXT: xor a5, a2, a0
+; RV32IA-TSO-NEXT: and a5, a5, a3
+; RV32IA-TSO-NEXT: xor a5, a2, a5
+; RV32IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-NEXT: bnez a5, .LBB6_1
+; RV32IA-TSO-NEXT: .LBB6_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB6_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB6_1
+; RV64IA-WMO-NEXT: .LBB6_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: li a3, 255
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB6_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB6_1
+; RV64IA-TSO-NEXT: .LBB6_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: li a3, 255
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB6_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acq_rel_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire
+ ret void
+}
+
+define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: zext.b a2, a2
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a3, a3, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a5, a2, a3
+; RV32IA-NEXT: bne a5, a1, .LBB7_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1
+; RV32IA-NEXT: xor a5, a2, a0
+; RV32IA-NEXT: and a5, a5, a3
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-NEXT: bnez a5, .LBB7_1
+; RV32IA-NEXT: .LBB7_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB7_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB7_1
+; RV64IA-WMO-NEXT: .LBB7_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: li a3, 255
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB7_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB7_1
+; RV64IA-ZACAS-NEXT: .LBB7_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB7_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB7_1
+; RV64IA-TSO-NEXT: .LBB7_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic
+ ret void
+}
+
+define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: zext.b a2, a2
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a3, a3, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a5, a2, a3
+; RV32IA-NEXT: bne a5, a1, .LBB8_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; RV32IA-NEXT: xor a5, a2, a0
+; RV32IA-NEXT: and a5, a5, a3
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-NEXT: bnez a5, .LBB8_1
+; RV32IA-NEXT: .LBB8_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB8_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB8_1
+; RV64IA-WMO-NEXT: .LBB8_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: li a3, 255
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB8_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB8_1
+; RV64IA-ZACAS-NEXT: .LBB8_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB8_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB8_1
+; RV64IA-TSO-NEXT: .LBB8_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst acquire
+ ret void
+}
+
+define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: addi a1, sp, 11
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: zext.b a2, a2
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a3, a3, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a5, a2, a3
+; RV32IA-NEXT: bne a5, a1, .LBB9_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; RV32IA-NEXT: xor a5, a2, a0
+; RV32IA-NEXT: and a5, a5, a3
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a4)
+; RV32IA-NEXT: bnez a5, .LBB9_1
+; RV32IA-NEXT: .LBB9_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: li a3, 255
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: zext.b a1, a1
+; RV64IA-WMO-NEXT: zext.b a2, a2
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a3, a3, a0
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a5, a2, a3
+; RV64IA-WMO-NEXT: bne a5, a1, .LBB9_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; RV64IA-WMO-NEXT: xor a5, a2, a0
+; RV64IA-WMO-NEXT: and a5, a5, a3
+; RV64IA-WMO-NEXT: xor a5, a2, a5
+; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-WMO-NEXT: bnez a5, .LBB9_1
+; RV64IA-WMO-NEXT: .LBB9_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: li a3, 255
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: zext.b a1, a1
+; RV64IA-ZACAS-NEXT: zext.b a2, a2
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a3, a3, a0
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a5, a2, a3
+; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB9_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a5, a2, a0
+; RV64IA-ZACAS-NEXT: and a5, a5, a3
+; RV64IA-ZACAS-NEXT: xor a5, a2, a5
+; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-ZACAS-NEXT: bnez a5, .LBB9_1
+; RV64IA-ZACAS-NEXT: .LBB9_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
+; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: li a3, 255
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: zext.b a1, a1
+; RV64IA-TSO-NEXT: zext.b a2, a2
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a3, a3, a0
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a5, a2, a3
+; RV64IA-TSO-NEXT: bne a5, a1, .LBB9_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; RV64IA-TSO-NEXT: xor a5, a2, a0
+; RV64IA-TSO-NEXT: and a5, a5, a3
+; RV64IA-TSO-NEXT: xor a5, a2, a5
+; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4)
+; RV64IA-TSO-NEXT: bnez a5, .LBB9_1
+; RV64IA-TSO-NEXT: .LBB9_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
+; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst
+ ret void
+}
+
+define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a5, a3, a0
+; RV32IA-NEXT: and a1, a1, a3
+; RV32IA-NEXT: and a2, a2, a3
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w a2, (a4)
+; RV32IA-NEXT: and a3, a2, a5
+; RV32IA-NEXT: bne a3, a1, .LBB10_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV32IA-NEXT: xor a3, a2, a0
+; RV32IA-NEXT: and a3, a3, a5
+; RV32IA-NEXT: xor a3, a2, a3
+; RV32IA-NEXT: sc.w a3, a3, (a4)
+; RV32IA-NEXT: bnez a3, .LBB10_1
+; RV32IA-NEXT: .LBB10_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB10_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB10_1
+; RV64IA-WMO-NEXT: .LBB10_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: lui a3, 16
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB10_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-ZACAS-NEXT: bnez a3, .LBB10_1
+; RV64IA-ZACAS-NEXT: .LBB10_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB10_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB10_1
+; RV64IA-TSO-NEXT: .LBB10_3:
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
+ ret void
+}
+
+define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB11_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB11_1
+; RV32IA-WMO-NEXT: .LBB11_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB11_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB11_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB11_1
+; RV32IA-TSO-NEXT: .LBB11_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB11_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB11_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB11_1
+; RV64IA-WMO-NEXT: .LBB11_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB11_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB11_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB11_1
+; RV64IA-TSO-NEXT: .LBB11_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB11_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acquire_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
+ ret void
+}
+
+define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB12_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB12_1
+; RV32IA-WMO-NEXT: .LBB12_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB12_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB12_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB12_1
+; RV32IA-TSO-NEXT: .LBB12_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB12_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB12_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB12_1
+; RV64IA-WMO-NEXT: .LBB12_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB12_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB12_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB12_1
+; RV64IA-TSO-NEXT: .LBB12_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB12_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acquire_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_release_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB13_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB13_1
+; RV32IA-WMO-NEXT: .LBB13_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB13_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB13_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB13_1
+; RV32IA-TSO-NEXT: .LBB13_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB13_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_release_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB13_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB13_1
+; RV64IA-WMO-NEXT: .LBB13_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB13_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.rl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB13_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB13_1
+; RV64IA-TSO-NEXT: .LBB13_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB13_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_release_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
+ ret void
+}
+
+define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_release_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB14_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB14_1
+; RV32IA-WMO-NEXT: .LBB14_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB14_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB14_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB14_1
+; RV32IA-TSO-NEXT: .LBB14_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB14_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_release_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB14_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB14_1
+; RV64IA-WMO-NEXT: .LBB14_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB14_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB14_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB14_1
+; RV64IA-TSO-NEXT: .LBB14_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB14_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_release_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire
+ ret void
+}
+
+define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB15_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB15_1
+; RV32IA-WMO-NEXT: .LBB15_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB15_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB15_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB15_1
+; RV32IA-TSO-NEXT: .LBB15_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB15_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB15_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB15_1
+; RV64IA-WMO-NEXT: .LBB15_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB15_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB15_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB15_1
+; RV64IA-TSO-NEXT: .LBB15_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB15_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acq_rel_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
+ ret void
+}
+
+define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lui a3, 16
+; RV32IA-WMO-NEXT: andi a4, a0, -4
+; RV32IA-WMO-NEXT: andi a0, a0, 3
+; RV32IA-WMO-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NEXT: sll a5, a3, a0
+; RV32IA-WMO-NEXT: and a1, a1, a3
+; RV32IA-WMO-NEXT: and a2, a2, a3
+; RV32IA-WMO-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NEXT: sll a0, a2, a0
+; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-NEXT: and a3, a2, a5
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB16_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-WMO-NEXT: xor a3, a2, a0
+; RV32IA-WMO-NEXT: and a3, a3, a5
+; RV32IA-WMO-NEXT: xor a3, a2, a3
+; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-NEXT: bnez a3, .LBB16_1
+; RV32IA-WMO-NEXT: .LBB16_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_3:
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lui a3, 16
+; RV32IA-TSO-NEXT: andi a4, a0, -4
+; RV32IA-TSO-NEXT: andi a0, a0, 3
+; RV32IA-TSO-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NEXT: sll a5, a3, a0
+; RV32IA-TSO-NEXT: and a1, a1, a3
+; RV32IA-TSO-NEXT: and a2, a2, a3
+; RV32IA-TSO-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NEXT: sll a0, a2, a0
+; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-NEXT: and a3, a2, a5
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB16_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-TSO-NEXT: xor a3, a2, a0
+; RV32IA-TSO-NEXT: and a3, a3, a5
+; RV32IA-TSO-NEXT: xor a3, a2, a3
+; RV32IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-NEXT: bnez a3, .LBB16_1
+; RV32IA-TSO-NEXT: .LBB16_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1
+; RV32IA-TSO-ZACAS-NEXT: .LBB16_3:
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB16_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB16_1
+; RV64IA-WMO-NEXT: .LBB16_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4)
+; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3
+; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1
+; RV64IA-WMO-ZACAS-NEXT: .LBB16_3:
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB16_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB16_1
+; RV64IA-TSO-NEXT: .LBB16_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4)
+; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3
+; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4)
+; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1
+; RV64IA-TSO-ZACAS-NEXT: .LBB16_3:
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acq_rel_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire
+ ret void
+}
+
+define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a5, a3, a0
+; RV32IA-NEXT: and a1, a1, a3
+; RV32IA-NEXT: and a2, a2, a3
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a3, a2, a5
+; RV32IA-NEXT: bne a3, a1, .LBB17_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1
+; RV32IA-NEXT: xor a3, a2, a0
+; RV32IA-NEXT: and a3, a3, a5
+; RV32IA-NEXT: xor a3, a2, a3
+; RV32IA-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-NEXT: bnez a3, .LBB17_1
+; RV32IA-NEXT: .LBB17_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB17_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB17_1
+; RV64IA-WMO-NEXT: .LBB17_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: lui a3, 16
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB17_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-ZACAS-NEXT: bnez a3, .LBB17_1
+; RV64IA-ZACAS-NEXT: .LBB17_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB17_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB17_1
+; RV64IA-TSO-NEXT: .LBB17_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic
+ ret void
+}
+
+define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a5, a3, a0
+; RV32IA-NEXT: and a1, a1, a3
+; RV32IA-NEXT: and a2, a2, a3
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a3, a2, a5
+; RV32IA-NEXT: bne a3, a1, .LBB18_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1
+; RV32IA-NEXT: xor a3, a2, a0
+; RV32IA-NEXT: and a3, a3, a5
+; RV32IA-NEXT: xor a3, a2, a3
+; RV32IA-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-NEXT: bnez a3, .LBB18_1
+; RV32IA-NEXT: .LBB18_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB18_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB18_1
+; RV64IA-WMO-NEXT: .LBB18_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: lui a3, 16
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB18_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-ZACAS-NEXT: bnez a3, .LBB18_1
+; RV64IA-ZACAS-NEXT: .LBB18_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB18_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB18_1
+; RV64IA-TSO-NEXT: .LBB18_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst acquire
+ ret void
+}
+
+define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: andi a4, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a5, a3, a0
+; RV32IA-NEXT: and a1, a1, a3
+; RV32IA-NEXT: and a2, a2, a3
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: sll a0, a2, a0
+; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a4)
+; RV32IA-NEXT: and a3, a2, a5
+; RV32IA-NEXT: bne a3, a1, .LBB19_3
+; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1
+; RV32IA-NEXT: xor a3, a2, a0
+; RV32IA-NEXT: and a3, a3, a5
+; RV32IA-NEXT: xor a3, a2, a3
+; RV32IA-NEXT: sc.w.rl a3, a3, (a4)
+; RV32IA-NEXT: bnez a3, .LBB19_1
+; RV32IA-NEXT: .LBB19_3:
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lui a3, 16
+; RV64IA-WMO-NEXT: andi a4, a0, -4
+; RV64IA-WMO-NEXT: andi a0, a0, 3
+; RV64IA-WMO-NEXT: addi a3, a3, -1
+; RV64IA-WMO-NEXT: slli a0, a0, 3
+; RV64IA-WMO-NEXT: sllw a5, a3, a0
+; RV64IA-WMO-NEXT: and a1, a1, a3
+; RV64IA-WMO-NEXT: and a2, a2, a3
+; RV64IA-WMO-NEXT: sllw a1, a1, a0
+; RV64IA-WMO-NEXT: sllw a0, a2, a0
+; RV64IA-WMO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-WMO-NEXT: and a3, a2, a5
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB19_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1
+; RV64IA-WMO-NEXT: xor a3, a2, a0
+; RV64IA-WMO-NEXT: and a3, a3, a5
+; RV64IA-WMO-NEXT: xor a3, a2, a3
+; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-WMO-NEXT: bnez a3, .LBB19_1
+; RV64IA-WMO-NEXT: .LBB19_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: lui a3, 16
+; RV64IA-ZACAS-NEXT: andi a4, a0, -4
+; RV64IA-ZACAS-NEXT: andi a0, a0, 3
+; RV64IA-ZACAS-NEXT: addi a3, a3, -1
+; RV64IA-ZACAS-NEXT: slli a0, a0, 3
+; RV64IA-ZACAS-NEXT: sllw a5, a3, a0
+; RV64IA-ZACAS-NEXT: and a1, a1, a3
+; RV64IA-ZACAS-NEXT: and a2, a2, a3
+; RV64IA-ZACAS-NEXT: sllw a1, a1, a0
+; RV64IA-ZACAS-NEXT: sllw a0, a2, a0
+; RV64IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-ZACAS-NEXT: and a3, a2, a5
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB19_3
+; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1
+; RV64IA-ZACAS-NEXT: xor a3, a2, a0
+; RV64IA-ZACAS-NEXT: and a3, a3, a5
+; RV64IA-ZACAS-NEXT: xor a3, a2, a3
+; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-ZACAS-NEXT: bnez a3, .LBB19_1
+; RV64IA-ZACAS-NEXT: .LBB19_3:
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
+; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lui a3, 16
+; RV64IA-TSO-NEXT: andi a4, a0, -4
+; RV64IA-TSO-NEXT: andi a0, a0, 3
+; RV64IA-TSO-NEXT: addi a3, a3, -1
+; RV64IA-TSO-NEXT: slli a0, a0, 3
+; RV64IA-TSO-NEXT: sllw a5, a3, a0
+; RV64IA-TSO-NEXT: and a1, a1, a3
+; RV64IA-TSO-NEXT: and a2, a2, a3
+; RV64IA-TSO-NEXT: sllw a1, a1, a0
+; RV64IA-TSO-NEXT: sllw a0, a2, a0
+; RV64IA-TSO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4)
+; RV64IA-TSO-NEXT: and a3, a2, a5
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB19_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1
+; RV64IA-TSO-NEXT: xor a3, a2, a0
+; RV64IA-TSO-NEXT: and a3, a3, a5
+; RV64IA-TSO-NEXT: xor a3, a2, a3
+; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4)
+; RV64IA-TSO-NEXT: bnez a3, .LBB19_1
+; RV64IA-TSO-NEXT: .LBB19_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
+; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst
+ ret void
+}
+
+define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB20_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB20_1
+; RV32IA-WMO-NEXT: .LBB20_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB20_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB20_1
+; RV32IA-TSO-NEXT: .LBB20_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB20_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB20_1
+; RV64IA-WMO-NEXT: .LBB20_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB20_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB20_1
+; RV64IA-TSO-NEXT: .LBB20_3:
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
+ ret void
+}
+
+define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB21_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB21_1
+; RV32IA-WMO-NEXT: .LBB21_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB21_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB21_1
+; RV32IA-TSO-NEXT: .LBB21_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB21_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB21_1
+; RV64IA-WMO-NEXT: .LBB21_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB21_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB21_1
+; RV64IA-TSO-NEXT: .LBB21_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acquire_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
+ ret void
+}
+
+define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB22_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB22_1
+; RV32IA-WMO-NEXT: .LBB22_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB22_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB22_1
+; RV32IA-TSO-NEXT: .LBB22_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB22_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB22_1
+; RV64IA-WMO-NEXT: .LBB22_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB22_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB22_1
+; RV64IA-TSO-NEXT: .LBB22_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acquire_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_release_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB23_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB23_1
+; RV32IA-WMO-NEXT: .LBB23_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB23_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB23_1
+; RV32IA-TSO-NEXT: .LBB23_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_release_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB23_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB23_1
+; RV64IA-WMO-NEXT: .LBB23_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.rl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB23_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB23_1
+; RV64IA-TSO-NEXT: .LBB23_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_release_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
+ ret void
+}
+
+define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_release_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB24_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB24_1
+; RV32IA-WMO-NEXT: .LBB24_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB24_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB24_1
+; RV32IA-TSO-NEXT: .LBB24_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_release_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB24_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB24_1
+; RV64IA-WMO-NEXT: .LBB24_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB24_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB24_1
+; RV64IA-TSO-NEXT: .LBB24_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_release_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire
+ ret void
+}
+
+define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB25_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB25_1
+; RV32IA-WMO-NEXT: .LBB25_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB25_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB25_1
+; RV32IA-TSO-NEXT: .LBB25_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB25_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB25_1
+; RV64IA-WMO-NEXT: .LBB25_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB25_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB25_1
+; RV64IA-TSO-NEXT: .LBB25_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
+ ret void
+}
+
+define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 4
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB26_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB26_1
+; RV32IA-WMO-NEXT: .LBB26_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB26_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB26_1
+; RV32IA-TSO-NEXT: .LBB26_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB26_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB26_1
+; RV64IA-WMO-NEXT: .LBB26_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB26_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB26_1
+; RV64IA-TSO-NEXT: .LBB26_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acq_rel_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire
+ ret void
+}
+
+define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB27_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB27_1
+; RV32IA-WMO-NEXT: .LBB27_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB27_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB27_1
+; RV32IA-TSO-NEXT: .LBB27_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB27_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB27_1
+; RV64IA-WMO-NEXT: .LBB27_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB27_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB27_1
+; RV64IA-TSO-NEXT: .LBB27_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
+ ret void
+}
+
+define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB28_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB28_1
+; RV32IA-WMO-NEXT: .LBB28_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB28_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB28_1
+; RV32IA-TSO-NEXT: .LBB28_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB28_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB28_1
+; RV64IA-WMO-NEXT: .LBB28_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB28_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB28_1
+; RV64IA-TSO-NEXT: .LBB28_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire
+ ret void
+}
+
+define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-WMO-NEXT: bne a3, a1, .LBB29_3
+; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-WMO-NEXT: bnez a4, .LBB29_1
+; RV32IA-WMO-NEXT: .LBB29_3:
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-TSO-NEXT: bne a3, a1, .LBB29_3
+; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-TSO-NEXT: bnez a4, .LBB29_1
+; RV32IA-TSO-NEXT: .LBB29_3:
+; RV32IA-TSO-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB29_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB29_1
+; RV64IA-WMO-NEXT: .LBB29_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
+; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
+; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB29_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1
+; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB29_1
+; RV64IA-TSO-NEXT: .LBB29_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
+; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
+; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
+ ret void
+}
+
+define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: mv a2, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: mv a2, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: li a4, 0
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB30_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB30_1
+; RV64IA-WMO-NEXT: .LBB30_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-ZACAS: # %bb.0:
+; RV64IA-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-ZACAS-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB30_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB30_1
+; RV64IA-TSO-NEXT: .LBB30_3:
+; RV64IA-TSO-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
+ ret void
+}
+
+define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 2
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB31_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB31_1
+; RV64IA-WMO-NEXT: .LBB31_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB31_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB31_1
+; RV64IA-TSO-NEXT: .LBB31_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acquire_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
+ ret void
+}
+
+define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 2
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a2, a6
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_acquire_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 2
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a2, a6
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB32_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB32_1
+; RV64IA-WMO-NEXT: .LBB32_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aq a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB32_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB32_1
+; RV64IA-TSO-NEXT: .LBB32_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acquire_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_release_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 3
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_release_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 3
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_release_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB33_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB33_1
+; RV64IA-WMO-NEXT: .LBB33_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.rl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.rl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB33_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB33_1
+; RV64IA-TSO-NEXT: .LBB33_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_release_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
+ ret void
+}
+
+define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_release_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 3
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a2, a6
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_release_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 3
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a2, a6
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_release_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 3
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB34_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB34_1
+; RV64IA-WMO-NEXT: .LBB34_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB34_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB34_1
+; RV64IA-TSO-NEXT: .LBB34_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_release_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire
+ ret void
+}
+
+define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 4
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 4
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB35_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB35_1
+; RV64IA-WMO-NEXT: .LBB35_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB35_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB35_1
+; RV64IA-TSO-NEXT: .LBB35_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acq_rel_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
+ ret void
+}
+
+define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 4
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a2, a6
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 4
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a2, a6
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aq a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB36_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB36_1
+; RV64IA-WMO-NEXT: .LBB36_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB36_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB36_1
+; RV64IA-TSO-NEXT: .LBB36_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acq_rel_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire
+ ret void
+}
+
+define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a5, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: li a5, 0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a5, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: li a5, 0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB37_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB37_1
+; RV64IA-WMO-NEXT: .LBB37_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB37_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB37_1
+; RV64IA-TSO-NEXT: .LBB37_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_monotonic:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
+ ret void
+}
+
+define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 2
+; RV32I-NEXT: mv a2, a6
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 2
+; RV32IA-NEXT: mv a2, a6
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 2
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB38_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB38_1
+; RV64IA-WMO-NEXT: .LBB38_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB38_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB38_1
+; RV64IA-TSO-NEXT: .LBB38_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_acquire:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire
+ ret void
+}
+
+define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; RV32I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 5
+; RV32I-NEXT: mv a2, a6
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: mv a6, a3
+; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
+; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 5
+; RV32IA-NEXT: mv a2, a6
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-WMO-NEXT: bne a3, a1, .LBB39_3
+; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-WMO-NEXT: bnez a4, .LBB39_1
+; RV64IA-WMO-NEXT: .LBB39_3:
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
+; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZACAS-NEXT: ret
+;
+; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
+; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
+; RV64IA-WMO-ZABHA-NEXT: ret
+;
+; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0)
+; RV64IA-TSO-NEXT: bne a3, a1, .LBB39_3
+; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0)
+; RV64IA-TSO-NEXT: bnez a4, .LBB39_1
+; RV64IA-TSO-NEXT: .LBB39_3:
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
+; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZACAS-NEXT: ret
+;
+; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
+; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
+; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
+; RV64IA-TSO-ZABHA-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV64IA: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
new file mode 100644
index 0000000000000..74249c1247e3e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+a,+zacas,+zabha -run-pass=instruction-select %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA
+
+---
+name: cmpxchg_i8
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8))
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i16
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16))
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32))
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_with_success_i32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
+ ; RV32IA-ZABHA-NEXT: $x11 = COPY [[SLTIU]]
+ ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]]
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gprb(p0) = COPY $x10
+ %1:gprb(s32) = G_CONSTANT i32 0
+ %2:gprb(s32) = G_CONSTANT i32 1
+ %3:gprb(s32) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store monotonic (s32))
+ %4:gprb(s32) = G_ICMP intpred(eq), %3(s32), %1
+ %5:gprb(s32) = COPY %3(s32)
+ ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ $x10 = COPY %5(s32)
+ $x11 = COPY %4(s32)
+ PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ %6:gprb(s32) = COPY $x10
+ $x10 = COPY %6(s32)
+ PseudoRET implicit $x10
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
new file mode 100644
index 0000000000000..a2f7e303a871f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
@@ -0,0 +1,144 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -run-pass=instruction-select %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV64IA-ZABHA
+
+---
+name: cmpxchg_i8
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV64IA-ZABHA-LABEL: name: cmpxchg_i8
+ ; RV64IA-ZABHA: liveins: $x10
+ ; RV64IA-ZABHA-NEXT: {{ $}}
+ ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
+ ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8))
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i16
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV64IA-ZABHA-LABEL: name: cmpxchg_i16
+ ; RV64IA-ZABHA: liveins: $x10
+ ; RV64IA-ZABHA-NEXT: {{ $}}
+ ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
+ ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16))
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV64IA-ZABHA-LABEL: name: cmpxchg_i32
+ ; RV64IA-ZABHA: liveins: $x10
+ ; RV64IA-ZABHA-NEXT: {{ $}}
+ ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
+ ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32))
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV64IA-ZABHA-LABEL: name: cmpxchg_i64
+ ; RV64IA-ZABHA: liveins: $x10
+ ; RV64IA-ZABHA-NEXT: {{ $}}
+ ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
+ ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gpr(p0) = COPY $x10
+ %1:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64))
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_with_success_i64
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV64IA-ZABHA-LABEL: name: cmpxchg_with_success_i64
+ ; RV64IA-ZABHA: liveins: $x10
+ ; RV64IA-ZABHA-NEXT: {{ $}}
+ ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+ ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1
+ ; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
+ ; RV64IA-ZABHA-NEXT: $x11 = COPY [[SLTIU]]
+ ; RV64IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV64IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV64IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
+ ; RV64IA-ZABHA-NEXT: $x10 = COPY [[COPY2]]
+ ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
+ %0:gprb(p0) = COPY $x10
+ %1:gprb(s64) = G_CONSTANT i64 0
+ %2:gprb(s64) = G_CONSTANT i64 1
+ %3:gprb(s64) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store monotonic (s64))
+ %4:gprb(s64) = G_ICMP intpred(eq), %3(s64), %1
+ %5:gprb(s64) = COPY %3(s64)
+ %6:gprb(s64) = COPY %4(s64)
+ ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ $x10 = COPY %5(s64)
+ $x11 = COPY %6(s64)
+ PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ %7:gprb(s64) = COPY $x10
+ $x10 = COPY %7(s64)
+ PseudoRET implicit $x10
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index 1945c010fe94f..7052767771c88 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -214,15 +214,16 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. the first uncovered type index: 2, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ATOMICRMW_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir
new file mode 100644
index 0000000000000..3f50bc729f52f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir
@@ -0,0 +1,155 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+a,+zacas,+zabha -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA
+# RUN: llc -mtriple=riscv32 -mattr=+a -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV32IA
+
+---
+name: cmpxchg_i8
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i8
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(s8) = G_CONSTANT i8 1
+ %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8))
+ %4:_(s32) = G_ANYEXT %3
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i16
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i16
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(s16) = G_CONSTANT i16 1
+ %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16))
+ %4:_(s32) = G_ANYEXT %3
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i32
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i32
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32))
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_with_success_i32
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[C]]
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s32)
+ ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s32)
+ ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]](s32)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_with_success_i32
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[C]]
+ ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s32)
+ ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s32)
+ ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32IA-NEXT: $x10 = COPY [[COPY2]](s32)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s32))
+ %5:_(s32) = G_ANYEXT %4
+ %6:_(s32) = G_MUL %3, %5
+ $x10 = COPY %6(s32)
+ PseudoRET implicit $x10
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir
new file mode 100644
index 0000000000000..689998299a8b2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir
@@ -0,0 +1,240 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA
+# RUN: llc -mtriple=riscv64 -mattr=+a -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefixes=RV32IA
+
+---
+name: cmpxchg_i8
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i8
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(s8) = G_CONSTANT i8 1
+ %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8))
+ %4:_(s64) = G_ANYEXT %3
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i16
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i16
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(s16) = G_CONSTANT i16 1
+ %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16))
+ %4:_(s64) = G_ANYEXT %3
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i32
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i32
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32))
+ %4:_(s64) = G_ANYEXT %3
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_i64
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_i64
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64))
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_i64
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64))
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64))
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_with_success_i32
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ATOMIC_CMPXCHG]], 32
+ ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SEXT_INREG]](s64), [[C]]
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_with_success_i32
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32))
+ ; RV32IA-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ATOMIC_CMPXCHG]], 32
+ ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SEXT_INREG]](s64), [[C]]
+ ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s64)
+ ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s32))
+ %5:_(s32) = G_ANYEXT %4
+ %6:_(s32) = G_MUL %3, %5
+ %7:_(s64) = G_ANYEXT %6
+ $x10 = COPY %7(s64)
+ PseudoRET implicit $x10
+...
+---
+name: cmpxchg_with_success_i64
+
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i64
+ ; RV32IA-ZABHA: liveins: $x10
+ ; RV32IA-ZABHA-NEXT: {{ $}}
+ ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64))
+ ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[C]]
+ ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]](s64)
+ ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IA-LABEL: name: cmpxchg_with_success_i64
+ ; RV32IA: liveins: $x10
+ ; RV32IA-NEXT: {{ $}}
+ ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64))
+ ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[C]]
+ ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s64)
+ ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; RV32IA-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV32IA-NEXT: $x10 = COPY [[COPY2]](s64)
+ ; RV32IA-NEXT: PseudoRET implicit $x10
+ %0:_(p0) = COPY $x10
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s64))
+ %5:_(s64) = G_ANYEXT %4
+ %6:_(s64) = G_MUL %3, %5
+ $x10 = COPY %6(s64)
+ PseudoRET implicit $x10
+...
More information about the llvm-commits
mailing list