[llvm] f5ed0cb - [RISCV] Add target feature to force-enable atomics
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 9 07:04:54 PDT 2022
Author: Nikita Popov
Date: 2022-08-09T16:04:46+02:00
New Revision: f5ed0cb217a9988f97b55f2ccb053bca7b41cc0c
URL: https://github.com/llvm/llvm-project/commit/f5ed0cb217a9988f97b55f2ccb053bca7b41cc0c
DIFF: https://github.com/llvm/llvm-project/commit/f5ed0cb217a9988f97b55f2ccb053bca7b41cc0c.diff
LOG: [RISCV] Add target feature to force-enable atomics
This adds a +forced-atomics target feature with the same semantics
as +atomics-32 on ARM (D130480). For RISCV targets without the +a
extension, this forces LLVM to assume that lock-free atomics
(up to 32/64 bits for riscv32/64 respectively) are available.
This means that atomic load/store are lowered to a simple load/store
(and fence as necessary), as these are guaranteed to be atomic
(as long as they're aligned). Atomic RMW/CAS are lowered to __sync
(rather than __atomic) libcalls. Responsibility for providing the
__sync libcalls lies with the user (for privileged single-core code
they can be implemented by disabling interrupts). Code using
+forced-atomics and -forced-atomics are not ABI compatible if atomic
variables cross the ABI boundary.
For context, the difference between __sync and __atomic is that the
former are required to be lock-free, while the latter requires a
shared global lock provided by a shared object library. See
https://llvm.org/docs/Atomics.html#libcalls-atomic for a detailed
discussion on the topic.
This target feature will be used by Rust's riscv32i target family
to support the use of atomic load/store without atomic RMW/CAS.
Differential Revision: https://reviews.llvm.org/D130621
Added:
llvm/test/CodeGen/RISCV/forced-atomics.ll
Modified:
llvm/docs/Atomics.rst
llvm/lib/Target/RISCV/RISCV.td
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoA.td
llvm/lib/Target/RISCV/RISCVSubtarget.h
Removed:
################################################################################
diff --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst
index d091bf5e9cf7..3e2cfe4b3e28 100644
--- a/llvm/docs/Atomics.rst
+++ b/llvm/docs/Atomics.rst
@@ -581,6 +581,13 @@ case. The only common architecture without that property is SPARC -- SPARCV8 SMP
systems were common, yet it doesn't support any sort of compare-and-swap
operation.
+Some targets (like RISCV) support a ``+forced-atomics`` target feature, which
+enables the use of lock-free atomics even if LLVM is not aware of any specific
+OS support for them. In this case, the user is responsible for ensuring that
+necessary ``__sync_*`` implementations are available. Code using
+``+forced-atomics`` is ABI-incompatible with code not using the feature, if
+atomic variables cross the ABI boundary.
+
In either of these cases, the Target in LLVM can claim support for atomics of an
appropriate size, and then implement some subset of the operations via libcalls
to a ``__sync_*`` function. Such functions *must* not use locks in their
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 8a6f69c7f7ca..e8bff273847a 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -481,6 +481,17 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors",
[TuneNoDefaultUnroll]>;
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<
+ "forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasAtomicLdSt
+ : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
+
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1caf089c2fa0..2593365ae55c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -411,6 +411,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
+ } else if (Subtarget.hasForcedAtomics()) {
+ setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
} else {
setMaxAtomicSizeInBitsSupported(0);
}
@@ -929,6 +931,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget.hasForcedAtomics()) {
+ // Set atomic rmw/cas operations to expand to force __sync libcalls.
+ setOperationAction(
+ {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
+ ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
+ ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
+ ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
+ XLenVT, Expand);
+ }
+
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
@@ -12286,6 +12298,10 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
+ // Don't expand forced atomics, we want to have __sync libcalls instead.
+ if (Subtarget.hasForcedAtomics())
+ return AtomicExpansionKind::None;
+
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
@@ -12389,6 +12405,10 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *CI) const {
+ // Don't expand forced atomics, we want to have __sync libcalls instead.
+ if (Subtarget.hasForcedAtomics())
+ return AtomicExpansionKind::None;
+
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index dd4b174d7e62..5227acc1e504 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -105,20 +105,25 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtA] in {
-
-/// Atomic loads and stores
-
+// Atomic load/store are available under both +a and +force-atomics.
// Fences will be inserted for atomic load/stores according to the logic in
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+let Predicates = [HasAtomicLdSt] in {
+ defm : LdPat<atomic_load_8, LB>;
+ defm : LdPat<atomic_load_16, LH>;
+ defm : LdPat<atomic_load_32, LW>;
+
+ defm : AtomicStPat<atomic_store_8, SB, GPR>;
+ defm : AtomicStPat<atomic_store_16, SH, GPR>;
+ defm : AtomicStPat<atomic_store_32, SW, GPR>;
+}
-defm : LdPat<atomic_load_8, LB>;
-defm : LdPat<atomic_load_16, LH>;
-defm : LdPat<atomic_load_32, LW>;
+let Predicates = [HasAtomicLdSt, IsRV64] in {
+ defm : LdPat<atomic_load_64, LD, i64>;
+ defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
+}
-defm : AtomicStPat<atomic_store_8, SB, GPR>;
-defm : AtomicStPat<atomic_store_16, SH, GPR>;
-defm : AtomicStPat<atomic_store_32, SW, GPR>;
+let Predicates = [HasStdExtA] in {
/// AMOs
@@ -304,13 +309,6 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
let Predicates = [HasStdExtA, IsRV64] in {
-/// 64-bit atomic loads and stores
-
-// Fences will be inserted for atomic load/stores according to the logic in
-// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
-defm : LdPat<atomic_load_64, LD, i64>;
-defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
-
defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6eb949fa551c..c13593496253 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -98,6 +98,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool EnableSaveRestore = false;
bool EnableUnalignedScalarMem = false;
bool HasLUIADDIFusion = false;
+ bool HasForcedAtomics = false;
unsigned XLen = 32;
unsigned ZvlLen = 0;
MVT XLenVT = MVT::i32;
@@ -194,6 +195,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool enableSaveRestore() const { return EnableSaveRestore; }
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
+ bool hasForcedAtomics() const { return HasForcedAtomics; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
unsigned getFLen() const {
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
new file mode 100644
index 000000000000..a6c735bdfa95
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -0,0 +1,3642 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
+; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC
+; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
+; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC
+
+define i8 @load8(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load8:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_1 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load8:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, rw
+; RV32-ATOMIC-NEXT: lb a0, 0(a0)
+; RV32-ATOMIC-NEXT: fence r, rw
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load8:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_1 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load8:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, rw
+; RV64-ATOMIC-NEXT: lb a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i8, ptr %p seq_cst, align 1
+ ret i8 %v
+}
+
+define void @store8(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store8:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_1 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store8:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, w
+; RV32-ATOMIC-NEXT: sb zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store8:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_1 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store8:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sb zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i8 0, ptr %p seq_cst, align 1
+ ret void
+}
+
+define i8 @rmw8(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw8:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_add_1 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw8:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_add_1 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw8:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_1 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw8:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_1 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i8 1 seq_cst, align 1
+ ret i8 %v
+}
+
+define i8 @cmpxchg8(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: cmpxchg8:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sb zero, 11(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 11
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1 at plt
+; RV32-NO-ATOMIC-NEXT: lb a0, 11(sp)
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: cmpxchg8:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a2, 1
+; RV32-ATOMIC-NEXT: li a1, 0
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_1 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg8:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sb zero, 7(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 7
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1 at plt
+; RV64-NO-ATOMIC-NEXT: lb a0, 7(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg8:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_1 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i8 0, i8 1 seq_cst seq_cst
+ %res.0 = extractvalue { i8, i1 } %res, 0
+ ret i8 %res.0
+}
+
+define i16 @load16(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load16:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_2 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load16:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, rw
+; RV32-ATOMIC-NEXT: lh a0, 0(a0)
+; RV32-ATOMIC-NEXT: fence r, rw
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load16:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_2 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load16:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, rw
+; RV64-ATOMIC-NEXT: lh a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i16, ptr %p seq_cst, align 2
+ ret i16 %v
+}
+
+define void @store16(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store16:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_2 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store16:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, w
+; RV32-ATOMIC-NEXT: sh zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store16:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_2 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store16:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sh zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i16 0, ptr %p seq_cst, align 2
+ ret void
+}
+
+define i16 @rmw16(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw16:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_add_2 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw16:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_add_2 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw16:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_2 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw16:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_2 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i16 1 seq_cst, align 2
+ ret i16 %v
+}
+
+define i16 @cmpxchg16(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: cmpxchg16:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sh zero, 10(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 10
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_2 at plt
+; RV32-NO-ATOMIC-NEXT: lh a0, 10(sp)
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: cmpxchg16:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a2, 1
+; RV32-ATOMIC-NEXT: li a1, 0
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_2 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg16:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sh zero, 6(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 6
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_2 at plt
+; RV64-NO-ATOMIC-NEXT: lh a0, 6(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg16:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_2 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i16 0, i16 1 seq_cst seq_cst
+ %res.0 = extractvalue { i16, i1 } %res, 0
+ ret i16 %res.0
+}
+
+define i32 @load32_unordered(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load32_unordered:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load32_unordered:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load32_unordered:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load32_unordered:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: lw a0, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i32, ptr %p unordered, align 4
+ ret i32 %v
+}
+
+define i32 @load32_monotonic(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load32_monotonic:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load32_monotonic:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load32_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load32_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: lw a0, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i32, ptr %p monotonic, align 4
+ ret i32 %v
+}
+
+define i32 @load32_acquire(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load32_acquire:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 2
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load32_acquire:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: fence r, rw
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load32_acquire:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 2
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load32_acquire:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: lw a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i32, ptr %p acquire, align 4
+ ret i32 %v
+}
+
+define i32 @load32_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: load32_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: load32_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, rw
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: fence r, rw
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load32_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load32_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, rw
+; RV64-ATOMIC-NEXT: lw a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i32, ptr %p seq_cst, align 4
+ ret i32 %v
+}
+
+define void @store32_unordered(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store32_unordered:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: li a2, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store32_unordered:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: sw zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store32_unordered:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store32_unordered:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: sw zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i32 0, ptr %p unordered, align 4
+ ret void
+}
+
+define void @store32_monotonic(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store32_monotonic:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: li a2, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store32_monotonic:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: sw zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store32_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store32_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: sw zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i32 0, ptr %p monotonic, align 4
+ ret void
+}
+
+define void @store32_release(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store32_release:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a2, 3
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store32_release:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, w
+; RV32-ATOMIC-NEXT: sw zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store32_release:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 3
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store32_release:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sw zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i32 0, ptr %p release, align 4
+ ret void
+}
+
+define void @store32_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: store32_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: li a1, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: store32_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: fence rw, w
+; RV32-ATOMIC-NEXT: sw zero, 0(a0)
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store32_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store32_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sw zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i32 0, ptr %p seq_cst, align 4
+ ret void
+}
+
+define i32 @rmw32_add_monotonic(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_add_monotonic:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_add_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_add_monotonic:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_add_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_add_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_add_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i32 1 monotonic, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_add_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_add_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_add_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_add_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_add_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_add_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_add_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_sub_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_sub_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_sub_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_sub_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_sub_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_sub_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_sub_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_sub_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_sub_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw sub ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_and_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_and_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_and_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_and_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_and_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_and_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_and_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_and_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_and_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw and ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_nand_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_nand_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_nand_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_nand_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_nand_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_nand_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_nand_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_nand_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_nand_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw nand ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_or_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_or_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_or_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_or_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_or_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_or_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_or_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_or_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_or_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw or ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_xor_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_xor_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_fetch_xor_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_xor_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_xor_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_xor_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_xor_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_xor_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_xor_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw xor ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_max_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: j .LBB23_2
+; RV32-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB23_4
+; RV32-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: li a0, 1
+; RV32-NO-ATOMIC-NEXT: mv a2, a1
+; RV32-NO-ATOMIC-NEXT: blt a0, a1, .LBB23_1
+; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: j .LBB23_1
+; RV32-NO-ATOMIC-NEXT: .LBB23_4: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, a1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_max_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_max_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_max_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: j .LBB23_2
+; RV64-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB23_4
+; RV64-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: blt a0, a1, .LBB23_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB23_1
+; RV64-NO-ATOMIC-NEXT: .LBB23_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_max_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_max_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw max ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_min_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: li s1, 2
+; RV32-NO-ATOMIC-NEXT: j .LBB24_2
+; RV32-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB24_4
+; RV32-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: mv a2, a1
+; RV32-NO-ATOMIC-NEXT: blt a1, s1, .LBB24_1
+; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: j .LBB24_1
+; RV32-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, a1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_min_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_min_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_min_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li s1, 2
+; RV64-NO-ATOMIC-NEXT: j .LBB24_2
+; RV64-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB24_4
+; RV64-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: blt a1, s1, .LBB24_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB24_1
+; RV64-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_min_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_min_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw min ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_umax_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: j .LBB25_2
+; RV32-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB25_4
+; RV32-NO-ATOMIC-NEXT: .LBB25_2: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: li a0, 1
+; RV32-NO-ATOMIC-NEXT: mv a2, a1
+; RV32-NO-ATOMIC-NEXT: bltu a0, a1, .LBB25_1
+; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: j .LBB25_1
+; RV32-NO-ATOMIC-NEXT: .LBB25_4: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, a1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_umax_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_umax_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_umax_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: j .LBB25_2
+; RV64-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB25_4
+; RV64-NO-ATOMIC-NEXT: .LBB25_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: bltu a0, a1, .LBB25_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB25_1
+; RV64-NO-ATOMIC-NEXT: .LBB25_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_umax_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_umax_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw umax ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_umin_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: li s1, 2
+; RV32-NO-ATOMIC-NEXT: j .LBB26_2
+; RV32-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB26_4
+; RV32-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: mv a2, a1
+; RV32-NO-ATOMIC-NEXT: bltu a1, s1, .LBB26_1
+; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: j .LBB26_1
+; RV32-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, a1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_umin_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_fetch_and_umin_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_umin_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li s1, 2
+; RV64-NO-ATOMIC-NEXT: j .LBB26_2
+; RV64-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB26_4
+; RV64-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: bltu a1, s1, .LBB26_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB26_1
+; RV64-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_umin_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_umin_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw umin ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_xchg_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_xchg_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: li a2, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_xchg_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a1, 1
+; RV32-ATOMIC-NEXT: call __sync_lock_test_and_set_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_xchg_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_xchg_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_lock_test_and_set_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw xchg ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_fadd_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: lui a1, 260096
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: call __addsf3 at plt
+; RV32-NO-ATOMIC-NEXT: mv a2, a0
+; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB28_1
+; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_fadd_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: mv s0, a0
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
+; RV32-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-NEXT: mv s1, a0
+; RV32-ATOMIC-NEXT: lui a1, 260096
+; RV32-ATOMIC-NEXT: call __addsf3 at plt
+; RV32-ATOMIC-NEXT: mv a2, a0
+; RV32-ATOMIC-NEXT: mv a0, s0
+; RV32-ATOMIC-NEXT: mv a1, s1
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: bne a0, s1, .LBB28_1
+; RV32-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_fadd_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: lui a1, 260096
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: call __addsf3 at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB28_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_fadd_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: lui a1, 260096
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: call __addsf3 at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: sext.w s2, s1
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: mv s1, a0
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB28_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_fsub_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: lui a1, 784384
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: call __addsf3 at plt
+; RV32-NO-ATOMIC-NEXT: mv a2, a0
+; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB29_1
+; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_fsub_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: mv s0, a0
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
+; RV32-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-NEXT: mv s1, a0
+; RV32-ATOMIC-NEXT: lui a1, 784384
+; RV32-ATOMIC-NEXT: call __addsf3 at plt
+; RV32-ATOMIC-NEXT: mv a2, a0
+; RV32-ATOMIC-NEXT: mv a0, s0
+; RV32-ATOMIC-NEXT: mv a1, s1
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: bne a0, s1, .LBB29_1
+; RV32-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_fsub_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: lui a1, 784384
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: call __addsf3 at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB29_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_fsub_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: lui a1, 784384
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: call __addsf3 at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: sext.w s2, s1
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: mv s1, a0
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB29_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_fmin_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: lui a1, 260096
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: call fminf at plt
+; RV32-NO-ATOMIC-NEXT: mv a2, a0
+; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB30_1
+; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_fmin_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: mv s0, a0
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-NEXT: mv s1, a0
+; RV32-ATOMIC-NEXT: lui a1, 260096
+; RV32-ATOMIC-NEXT: call fminf at plt
+; RV32-ATOMIC-NEXT: mv a2, a0
+; RV32-ATOMIC-NEXT: mv a0, s0
+; RV32-ATOMIC-NEXT: mv a1, s1
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: bne a0, s1, .LBB30_1
+; RV32-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_fmin_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: lui a1, 260096
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: call fminf at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB30_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_fmin_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: lui a1, 260096
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: call fminf at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: sext.w s2, s1
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: mv s1, a0
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB30_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: rmw32_fmax_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: mv s0, a0
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV32-NO-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NO-ATOMIC-NEXT: lui a1, 260096
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: call fmaxf at plt
+; RV32-NO-ATOMIC-NEXT: mv a2, a0
+; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: mv a0, s0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB31_1
+; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NO-ATOMIC-NEXT: mv a0, s1
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: rmw32_fmax_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: mv s0, a0
+; RV32-ATOMIC-NEXT: lw a0, 0(a0)
+; RV32-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-NEXT: mv s1, a0
+; RV32-ATOMIC-NEXT: lui a1, 260096
+; RV32-ATOMIC-NEXT: call fmaxf at plt
+; RV32-ATOMIC-NEXT: mv a2, a0
+; RV32-ATOMIC-NEXT: mv a0, s0
+; RV32-ATOMIC-NEXT: mv a1, s1
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: bne a0, s1, .LBB31_1
+; RV32-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw32_fmax_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: lui a1, 260096
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: call fmaxf at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB31_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw32_fmax_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: lw s1, 0(a0)
+; RV64-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: lui a1, 260096
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: call fmaxf at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: sext.w s2, s1
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: mv s1, a0
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB31_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: mv a0, s1
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: cmpxchg32_monotonic:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw zero, 8(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: li a3, 0
+; RV32-NO-ATOMIC-NEXT: li a4, 0
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a0, 8(sp)
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: cmpxchg32_monotonic:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a2, 1
+; RV32-ATOMIC-NEXT: li a1, 0
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg32_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sw zero, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 0
+; RV64-NO-ATOMIC-NEXT: li a4, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg32_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i32 0, i32 1 monotonic monotonic
+ %res.0 = extractvalue { i32, i1 } %res, 0
+ ret i32 %res.0
+}
+
+define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
+; RV32-NO-ATOMIC-LABEL: cmpxchg32_seq_cst:
+; RV32-NO-ATOMIC: # %bb.0:
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw zero, 8(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV32-NO-ATOMIC-NEXT: li a2, 1
+; RV32-NO-ATOMIC-NEXT: li a3, 5
+; RV32-NO-ATOMIC-NEXT: li a4, 5
+; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV32-NO-ATOMIC-NEXT: lw a0, 8(sp)
+; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: ret
+;
+; RV32-ATOMIC-LABEL: cmpxchg32_seq_cst:
+; RV32-ATOMIC: # %bb.0:
+; RV32-ATOMIC-NEXT: addi sp, sp, -16
+; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-NEXT: li a2, 1
+; RV32-ATOMIC-NEXT: li a1, 0
+; RV32-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-ATOMIC-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg32_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sw zero, 4(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 at plt
+; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg32_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst
+ %res.0 = extractvalue { i32, i1 } %res, 0
+ ret i32 %res.0
+}
+
+define i64 @load64_unordered(ptr %p) nounwind {
+; RV32-LABEL: load64_unordered:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: call __atomic_load_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load64_unordered:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load64_unordered:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i64, ptr %p unordered, align 8
+ ret i64 %v
+}
+
+define i64 @load64_monotonic(ptr %p) nounwind {
+; RV32-LABEL: load64_monotonic:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: call __atomic_load_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load64_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load64_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i64, ptr %p monotonic, align 8
+ ret i64 %v
+}
+
+define i64 @load64_acquire(ptr %p) nounwind {
+; RV32-LABEL: load64_acquire:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 2
+; RV32-NEXT: call __atomic_load_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load64_acquire:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 2
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load64_acquire:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i64, ptr %p acquire, align 8
+ ret i64 %v
+}
+
+define i64 @load64_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: load64_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 5
+; RV32-NEXT: call __atomic_load_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: load64_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_load_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: load64_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, rw
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: fence r, rw
+; RV64-ATOMIC-NEXT: ret
+ %v = load atomic i64, ptr %p seq_cst, align 8
+ ret i64 %v
+}
+
+define void @store64_unordered(ptr %p) nounwind {
+; RV32-LABEL: store64_unordered:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __atomic_store_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store64_unordered:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store64_unordered:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: sd zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i64 0, ptr %p unordered, align 8
+ ret void
+}
+
+define void @store64_monotonic(ptr %p) nounwind {
+; RV32-LABEL: store64_monotonic:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __atomic_store_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store64_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store64_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: sd zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i64 0, ptr %p monotonic, align 8
+ ret void
+}
+
+define void @store64_release(ptr %p) nounwind {
+; RV32-LABEL: store64_release:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_store_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store64_release:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 3
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store64_release:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sd zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i64 0, ptr %p release, align 8
+ ret void
+}
+
+define void @store64(ptr %p) nounwind {
+; RV32-LABEL: store64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_store_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: store64:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: li a1, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_store_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: store64:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: fence rw, w
+; RV64-ATOMIC-NEXT: sd zero, 0(a0)
+; RV64-ATOMIC-NEXT: ret
+ store atomic i64 0, ptr %p seq_cst, align 8
+ ret void
+}
+
+define i64 @rmw64_monotonic(ptr %p) nounwind {
+; RV32-LABEL: rmw64_monotonic:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __atomic_fetch_add_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i64 1 monotonic, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_add_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_add_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_add_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_add_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_add_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_add_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_add_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw add ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_sub_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_sub_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_sub_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_sub_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_sub_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_sub_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_sub_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw sub ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_and_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_and_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_and_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_and_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_and_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_and_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_and_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw and ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_nand_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_nand_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_nand_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_nand_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_nand_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_nand_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_nand_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw nand ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_or_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_or_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_or_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_or_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_or_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_or_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_or_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw or ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_xor_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_xor_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_fetch_xor_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_xor_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_fetch_xor_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_xor_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_xor_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw xor ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_max_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw a1, 4(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: j .LBB49_2
+; RV32-NEXT: .LBB49_1: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: bnez a0, .LBB49_7
+; RV32-NEXT: .LBB49_2: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: beqz a1, .LBB49_4
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: sgtz a0, a1
+; RV32-NEXT: j .LBB49_5
+; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: sltu a0, a0, a4
+; RV32-NEXT: .LBB49_5: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a0, .LBB49_1
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: j .LBB49_1
+; RV32-NEXT: .LBB49_7: # %atomicrmw.end
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_max_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: j .LBB49_2
+; RV64-NO-ATOMIC-NEXT: .LBB49_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB49_4
+; RV64-NO-ATOMIC-NEXT: .LBB49_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: blt a0, a1, .LBB49_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB49_1
+; RV64-NO-ATOMIC-NEXT: .LBB49_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_max_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_max_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw max ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_min_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw a1, 4(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: j .LBB50_2
+; RV32-NEXT: .LBB50_1: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: bnez a0, .LBB50_7
+; RV32-NEXT: .LBB50_2: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: beqz a1, .LBB50_4
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: slti a0, a1, 0
+; RV32-NEXT: j .LBB50_5
+; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: sltiu a0, a4, 2
+; RV32-NEXT: .LBB50_5: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a0, .LBB50_1
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: j .LBB50_1
+; RV32-NEXT: .LBB50_7: # %atomicrmw.end
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_min_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li s1, 2
+; RV64-NO-ATOMIC-NEXT: j .LBB50_2
+; RV64-NO-ATOMIC-NEXT: .LBB50_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB50_4
+; RV64-NO-ATOMIC-NEXT: .LBB50_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: blt a1, s1, .LBB50_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB50_1
+; RV64-NO-ATOMIC-NEXT: .LBB50_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_min_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_min_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw min ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_umax_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw a1, 4(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: j .LBB51_2
+; RV32-NEXT: .LBB51_1: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: bnez a0, .LBB51_7
+; RV32-NEXT: .LBB51_2: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: beqz a1, .LBB51_4
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: snez a0, a1
+; RV32-NEXT: j .LBB51_5
+; RV32-NEXT: .LBB51_4: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: sltu a0, a0, a4
+; RV32-NEXT: .LBB51_5: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a0, .LBB51_1
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: j .LBB51_1
+; RV32-NEXT: .LBB51_7: # %atomicrmw.end
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_umax_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: j .LBB51_2
+; RV64-NO-ATOMIC-NEXT: .LBB51_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB51_4
+; RV64-NO-ATOMIC-NEXT: .LBB51_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: bltu a0, a1, .LBB51_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB51_1
+; RV64-NO-ATOMIC-NEXT: .LBB51_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_umax_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_umax_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw umax ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_umin_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw a1, 4(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: j .LBB52_2
+; RV32-NEXT: .LBB52_1: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: bnez a0, .LBB52_7
+; RV32-NEXT: .LBB52_2: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: beqz a1, .LBB52_4
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: j .LBB52_5
+; RV32-NEXT: .LBB52_4: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: sltiu a0, a4, 2
+; RV32-NEXT: .LBB52_5: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a0, .LBB52_1
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: j .LBB52_1
+; RV32-NEXT: .LBB52_7: # %atomicrmw.end
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_umin_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li s1, 2
+; RV64-NO-ATOMIC-NEXT: j .LBB52_2
+; RV64-NO-ATOMIC-NEXT: .LBB52_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB52_4
+; RV64-NO-ATOMIC-NEXT: .LBB52_2: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a2, a1
+; RV64-NO-ATOMIC-NEXT: bltu a1, s1, .LBB52_1
+; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: j .LBB52_1
+; RV64-NO-ATOMIC-NEXT: .LBB52_4: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, a1
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_umin_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_fetch_and_umin_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw umin ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define i64 @rmw64_xchg_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_xchg_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __atomic_exchange_8 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_xchg_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: li a2, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_xchg_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a1, 1
+; RV64-ATOMIC-NEXT: call __sync_lock_test_and_set_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw xchg ptr %p, i64 1 seq_cst, align 8
+ ret i64 %v
+}
+
+define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_fadd_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw s1, 4(a0)
+; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: .LBB54_1: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: lui a3, 261888
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __adddf3 at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw s1, 12(sp)
+; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: beqz a0, .LBB54_1
+; RV32-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_fadd_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li a0, 1023
+; RV64-NO-ATOMIC-NEXT: slli s1, a0, 52
+; RV64-NO-ATOMIC-NEXT: .LBB54_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: mv a1, s1
+; RV64-NO-ATOMIC-NEXT: call __adddf3 at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB54_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_fadd_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: li a1, 1023
+; RV64-ATOMIC-NEXT: slli s1, a1, 52
+; RV64-ATOMIC-NEXT: .LBB54_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: mv s2, a0
+; RV64-ATOMIC-NEXT: mv a1, s1
+; RV64-ATOMIC-NEXT: call __adddf3 at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB54_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 8
+ ret double %v
+}
+
+define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_fsub_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw s1, 4(a0)
+; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: .LBB55_1: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: lui a3, 786176
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call __adddf3 at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw s1, 12(sp)
+; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: beqz a0, .LBB55_1
+; RV32-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_fsub_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li a0, -1025
+; RV64-NO-ATOMIC-NEXT: slli s1, a0, 52
+; RV64-NO-ATOMIC-NEXT: .LBB55_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: mv a1, s1
+; RV64-NO-ATOMIC-NEXT: call __adddf3 at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB55_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_fsub_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: li a1, -1025
+; RV64-ATOMIC-NEXT: slli s1, a1, 52
+; RV64-ATOMIC-NEXT: .LBB55_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: mv s2, a0
+; RV64-ATOMIC-NEXT: mv a1, s1
+; RV64-ATOMIC-NEXT: call __adddf3 at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB55_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 8
+ ret double %v
+}
+
+define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_fmin_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw s1, 4(a0)
+; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: .LBB56_1: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: lui a3, 261888
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call fmin at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw s1, 12(sp)
+; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: beqz a0, .LBB56_1
+; RV32-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_fmin_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li a0, 1023
+; RV64-NO-ATOMIC-NEXT: slli s1, a0, 52
+; RV64-NO-ATOMIC-NEXT: .LBB56_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: mv a1, s1
+; RV64-NO-ATOMIC-NEXT: call fmin at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB56_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_fmin_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: li a1, 1023
+; RV64-ATOMIC-NEXT: slli s1, a1, 52
+; RV64-ATOMIC-NEXT: .LBB56_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: mv s2, a0
+; RV64-ATOMIC-NEXT: mv a1, s1
+; RV64-ATOMIC-NEXT: call fmin at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB56_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 8
+ ret double %v
+}
+
+define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: rmw64_fmax_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lw s1, 4(a0)
+; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: .LBB57_1: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: lui a3, 261888
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: call fmax at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw s1, 12(sp)
+; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: beqz a0, .LBB57_1
+; RV32-NEXT: # %bb.2: # %atomicrmw.end
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a1, s1
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: rmw64_fmax_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: mv s0, a0
+; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
+; RV64-NO-ATOMIC-NEXT: li a0, 1023
+; RV64-NO-ATOMIC-NEXT: slli s1, a0, 52
+; RV64-NO-ATOMIC-NEXT: .LBB57_1: # %atomicrmw.start
+; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: mv a1, s1
+; RV64-NO-ATOMIC-NEXT: call fmax at plt
+; RV64-NO-ATOMIC-NEXT: mv a2, a0
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: mv a0, s0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB57_1
+; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-NO-ATOMIC-NEXT: mv a0, s2
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: rmw64_fmax_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: mv s0, a0
+; RV64-ATOMIC-NEXT: ld a0, 0(a0)
+; RV64-ATOMIC-NEXT: li a1, 1023
+; RV64-ATOMIC-NEXT: slli s1, a1, 52
+; RV64-ATOMIC-NEXT: .LBB57_1: # %atomicrmw.start
+; RV64-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-NEXT: mv s2, a0
+; RV64-ATOMIC-NEXT: mv a1, s1
+; RV64-ATOMIC-NEXT: call fmax at plt
+; RV64-ATOMIC-NEXT: mv a2, a0
+; RV64-ATOMIC-NEXT: mv a0, s0
+; RV64-ATOMIC-NEXT: mv a1, s2
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: bne a0, s2, .LBB57_1
+; RV64-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-ATOMIC-NEXT: ret
+ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 8
+ ret double %v
+}
+
+define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
+; RV32-LABEL: cmpxchg64_monotonic:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a0, 0(sp)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg64_monotonic:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd zero, 0(sp)
+; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 0
+; RV64-NO-ATOMIC-NEXT: li a4, 0
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg64_monotonic:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i64 0, i64 1 monotonic monotonic
+ %res.0 = extractvalue { i64, i1 } %res, 0
+ ret i64 %res.0
+}
+
+define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
+; RV32-LABEL: cmpxchg64_seq_cst:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: li a2, 1
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __atomic_compare_exchange_8 at plt
+; RV32-NEXT: lw a1, 4(sp)
+; RV32-NEXT: lw a0, 0(sp)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-NO-ATOMIC-LABEL: cmpxchg64_seq_cst:
+; RV64-NO-ATOMIC: # %bb.0:
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd zero, 0(sp)
+; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: li a2, 1
+; RV64-NO-ATOMIC-NEXT: li a3, 5
+; RV64-NO-ATOMIC-NEXT: li a4, 5
+; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 at plt
+; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ret
+;
+; RV64-ATOMIC-LABEL: cmpxchg64_seq_cst:
+; RV64-ATOMIC: # %bb.0:
+; RV64-ATOMIC-NEXT: addi sp, sp, -16
+; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-NEXT: li a2, 1
+; RV64-ATOMIC-NEXT: li a1, 0
+; RV64-ATOMIC-NEXT: call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-ATOMIC-NEXT: ret
+ %res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst
+ %res.0 = extractvalue { i64, i1 } %res, 0
+ ret i64 %res.0
+}
+
+define i128 @load128(ptr %p) nounwind {
+; RV32-LABEL: load128:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: li a0, 16
+; RV32-NEXT: addi a2, sp, 8
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: call __atomic_load at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: sw a0, 12(s0)
+; RV32-NEXT: sw a1, 8(s0)
+; RV32-NEXT: sw a2, 4(s0)
+; RV32-NEXT: sw a3, 0(s0)
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load128:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a1, 5
+; RV64-NEXT: call __atomic_load_16 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ %v = load atomic i128, ptr %p seq_cst, align 16
+ ret i128 %v
+}
+
+define void @store128(ptr %p) nounwind {
+; RV32-LABEL: store128:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: sw zero, 20(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: li a0, 16
+; RV32-NEXT: addi a2, sp, 8
+; RV32-NEXT: li a3, 5
+; RV32-NEXT: call __atomic_store at plt
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store128:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a3, 5
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: li a2, 0
+; RV64-NEXT: call __atomic_store_16 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ store atomic i128 0, ptr %p seq_cst, align 16
+ ret void
+}
+
+define i128 @rmw128(ptr %p) nounwind {
+; RV32-LABEL: rmw128:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a1
+; RV32-NEXT: lw a1, 12(a1)
+; RV32-NEXT: lw a2, 8(s0)
+; RV32-NEXT: lw a3, 4(s0)
+; RV32-NEXT: lw a4, 0(s0)
+; RV32-NEXT: mv s1, a0
+; RV32-NEXT: j .LBB62_2
+; RV32-NEXT: .LBB62_1: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB62_2 Depth=1
+; RV32-NEXT: add a6, a2, a6
+; RV32-NEXT: sltu a7, a6, a2
+; RV32-NEXT: add a7, a1, a7
+; RV32-NEXT: sw a4, 16(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a2, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a5, 4(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: sw a6, 8(sp)
+; RV32-NEXT: sw a7, 12(sp)
+; RV32-NEXT: li a0, 16
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: mv a3, sp
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: mv a1, s0
+; RV32-NEXT: call __atomic_compare_exchange at plt
+; RV32-NEXT: lw a1, 28(sp)
+; RV32-NEXT: lw a2, 24(sp)
+; RV32-NEXT: lw a3, 20(sp)
+; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: bnez a0, .LBB62_4
+; RV32-NEXT: .LBB62_2: # %atomicrmw.start
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: addi a0, a4, 1
+; RV32-NEXT: sltu a6, a0, a4
+; RV32-NEXT: add a5, a3, a6
+; RV32-NEXT: bgeu a0, a4, .LBB62_1
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB62_2 Depth=1
+; RV32-NEXT: sltu a6, a5, a3
+; RV32-NEXT: j .LBB62_1
+; RV32-NEXT: .LBB62_4: # %atomicrmw.end
+; RV32-NEXT: sw a4, 0(s1)
+; RV32-NEXT: sw a3, 4(s1)
+; RV32-NEXT: sw a2, 8(s1)
+; RV32-NEXT: sw a1, 12(s1)
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: ret
+;
+; RV64-LABEL: rmw128:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a3, 5
+; RV64-NEXT: li a2, 0
+; RV64-NEXT: call __atomic_fetch_add_16 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ %v = atomicrmw add ptr %p, i128 1 seq_cst, align 16
+ ret i128 %v
+}
+
+define i128 @cmpxchg128(ptr %p) nounwind {
+; RV32-LABEL: cmpxchg128:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: sw zero, 36(sp)
+; RV32-NEXT: sw zero, 32(sp)
+; RV32-NEXT: sw zero, 28(sp)
+; RV32-NEXT: sw zero, 24(sp)
+; RV32-NEXT: sw zero, 20(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: li a0, 16
+; RV32-NEXT: addi a2, sp, 24
+; RV32-NEXT: addi a3, sp, 8
+; RV32-NEXT: li a4, 5
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: call __atomic_compare_exchange at plt
+; RV32-NEXT: lw a0, 36(sp)
+; RV32-NEXT: lw a1, 32(sp)
+; RV32-NEXT: lw a2, 28(sp)
+; RV32-NEXT: lw a3, 24(sp)
+; RV32-NEXT: sw a0, 12(s0)
+; RV32-NEXT: sw a1, 8(s0)
+; RV32-NEXT: sw a2, 4(s0)
+; RV32-NEXT: sw a3, 0(s0)
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: ret
+;
+; RV64-LABEL: cmpxchg128:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd zero, 8(sp)
+; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: mv a1, sp
+; RV64-NEXT: li a2, 1
+; RV64-NEXT: li a4, 5
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a3, 0
+; RV64-NEXT: call __atomic_compare_exchange_16 at plt
+; RV64-NEXT: ld a1, 8(sp)
+; RV64-NEXT: ld a0, 0(sp)
+; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: ret
+ %res = cmpxchg ptr %p, i128 0, i128 1 seq_cst seq_cst
+ %res.0 = extractvalue { i128, i1 } %res, 0
+ ret i128 %res.0
+}
More information about the llvm-commits
mailing list