[llvm] 3ea8f25 - [RISCV] Strengthen atomic ordering for sequentially consistent stores

Paul Kirth via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 22 08:42:25 PDT 2023


Author: Paul Kirth
Date: 2023-06-22T15:42:17Z
New Revision: 3ea8f2526541884e03d5bd4f4e46f4eb190990b6

URL: https://github.com/llvm/llvm-project/commit/3ea8f2526541884e03d5bd4f4e46f4eb190990b6
DIFF: https://github.com/llvm/llvm-project/commit/3ea8f2526541884e03d5bd4f4e46f4eb190990b6.diff

LOG: [RISCV] Strengthen atomic ordering for sequentially consistent stores

This is a similar change to one proposed for GCC:
https://inbox.sourceware.org/gcc-patches/20230414170942.1695672-1-patrick@rivosinc.com/

The changes in this patch are based on the proposal by Hans Boehm to more
closely match the intended semantics for sequentially consistent stores
and to allow some platforms to avoid an ABI break when switching to more
performant atomic instructions. Platforms that have already compiled
code using the existing mappings will also have more time to gradually
replace that code in preparation of the switch.

Further details can be found in the psABI proposal:
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/378.

This patch implements a mapping that is stronger than the one outlined in table
A.6 of the RISC-V unprivileged spec to be future compatible with table A.7 of
the same document. The related discussion can be found at
https://lists.riscv.org/g/tech-unprivileged/topic/risc_v_memory_model_topics/92916241

The major change to RISC-V code generation is that we will now emit a trailing
fence for sequentially consistent stores.

The new code sequence should have the following form:
```
fence rw,w; s{b|h|w|d}; fence rw,rw;
```

Other changes and optimizations like using amoswap will be handled separately.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D149486

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVFeatures.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/atomic-load-store.ll
    llvm/test/CodeGen/RISCV/forced-atomics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 4b67f2b57cee5..85415874c6160 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -805,6 +805,11 @@ foreach i = {1-31} in
 def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
                                           "true", "Enable save/restore.">;
 
+def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
+                                          "EnableSeqCstTrailingFence",
+                                          "true",
+                                          "Enable trailing fence for seq-cst store.">;
+
 def FeatureUnalignedScalarMem
    : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
                       "true", "Has reasonably performant unaligned scalar "

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index fa0db231d381d..a7c5290d02614 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16108,6 +16108,9 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
 
   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
     return Builder.CreateFence(AtomicOrdering::Acquire);
+  if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
+      Ord == AtomicOrdering::SequentiallyConsistent)
+    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
   return nullptr;
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
index 8eaa98bf33ccf..1ca234db4c78e 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
@@ -12,6 +12,18 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
 
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+
 define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV32I-LABEL: atomic_load_i8_unordered:
 ; RV32I:       # %bb.0:
@@ -122,6 +134,28 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    lb a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i8, ptr %a acquire, align 1
   ret i8 %1
 }
@@ -172,6 +206,32 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    lb a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i8, ptr %a seq_cst, align 1
   ret i8 %1
 }
@@ -286,6 +346,28 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    lh a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i16, ptr %a acquire, align 2
   ret i16 %1
 }
@@ -336,6 +418,32 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    lh a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i16, ptr %a seq_cst, align 2
   ret i16 %1
 }
@@ -450,6 +558,28 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i32, ptr %a acquire, align 4
   ret i32 %1
 }
@@ -500,6 +630,32 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i32, ptr %a seq_cst, align 4
   ret i32 %1
 }
@@ -623,6 +779,17 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i64, ptr %a acquire, align 8
   ret i64 %1
 }
@@ -670,6 +837,19 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    fence rw, rw
 ; RV64IA-TSO-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   %1 = load atomic i64, ptr %a seq_cst, align 8
   ret i64 %1
 }
@@ -784,6 +964,28 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sb a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_release:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_release:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_release:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_release:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i8 %b, ptr %a release, align 1
   ret void
 }
@@ -830,6 +1032,30 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sb a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i8 %b, ptr %a seq_cst, align 1
   ret void
 }
@@ -944,6 +1170,28 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sh a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_release:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_release:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_release:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_release:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i16 %b, ptr %a release, align 2
   ret void
 }
@@ -990,6 +1238,30 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sh a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i16 %b, ptr %a seq_cst, align 2
   ret void
 }
@@ -1104,6 +1376,28 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_release:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_release:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_release:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_release:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i32 %b, ptr %a release, align 4
   ret void
 }
@@ -1150,6 +1444,30 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i32 %b, ptr %a seq_cst, align 4
   ret void
 }
@@ -1273,6 +1591,17 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_release:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_release:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i64 %b, ptr %a release, align 8
   ret void
 }
@@ -1318,6 +1647,18 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
   store atomic i64 %b, ptr %a seq_cst, align 8
   ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index a54360c417592..f2079e314d51c 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -1,8 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
 ; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC
+; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
+; RUN: llc -mtriple=riscv64 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
 ; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC
+; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING
 
 define i8 @load8(ptr %p) nounwind {
 ; RV32-NO-ATOMIC-LABEL: load8:
@@ -22,6 +26,13 @@ define i8 @load8(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    fence r, rw
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load8:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    lb a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load8:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -38,6 +49,13 @@ define i8 @load8(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    lb a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load8:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    lb a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i8, ptr %p seq_cst, align 1
   ret i8 %v
 }
@@ -60,6 +78,13 @@ define void @store8(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sb zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store8:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV32-ATOMIC-TRAILING-NEXT:    sb zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store8:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -76,6 +101,13 @@ define void @store8(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sb zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store8:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sb zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i8 0, ptr %p seq_cst, align 1
   ret void
 }
@@ -102,6 +134,16 @@ define i8 @rmw8(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw8:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_1 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw8:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -122,6 +164,16 @@ define i8 @rmw8(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw8:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_1 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i8 1 seq_cst, align 1
   ret i8 %v
 }
@@ -153,6 +205,17 @@ define i8 @cmpxchg8(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: cmpxchg8:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_1 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: cmpxchg8:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -178,6 +241,17 @@ define i8 @cmpxchg8(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg8:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_1 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i8 0, i8 1 seq_cst seq_cst
   %res.0 = extractvalue { i8, i1 } %res, 0
   ret i8 %res.0
@@ -201,6 +275,13 @@ define i16 @load16(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    fence r, rw
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load16:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    lh a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load16:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -217,6 +298,13 @@ define i16 @load16(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    lh a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load16:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    lh a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i16, ptr %p seq_cst, align 2
   ret i16 %v
 }
@@ -239,6 +327,13 @@ define void @store16(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sh zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store16:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV32-ATOMIC-TRAILING-NEXT:    sh zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store16:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -255,6 +350,13 @@ define void @store16(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sh zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store16:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sh zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i16 0, ptr %p seq_cst, align 2
   ret void
 }
@@ -281,6 +383,16 @@ define i16 @rmw16(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw16:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_2 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw16:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -301,6 +413,16 @@ define i16 @rmw16(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw16:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_2 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i16 1 seq_cst, align 2
   ret i16 %v
 }
@@ -332,6 +454,17 @@ define i16 @cmpxchg16(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: cmpxchg16:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_2 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: cmpxchg16:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -357,6 +490,17 @@ define i16 @cmpxchg16(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg16:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_2 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i16 0, i16 1 seq_cst seq_cst
   %res.0 = extractvalue { i16, i1 } %res, 0
   ret i16 %res.0
@@ -378,6 +522,11 @@ define i32 @load32_unordered(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load32_unordered:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load32_unordered:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -392,6 +541,11 @@ define i32 @load32_unordered(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load32_unordered:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i32, ptr %p unordered, align 4
   ret i32 %v
 }
@@ -412,6 +566,11 @@ define i32 @load32_monotonic(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load32_monotonic:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load32_monotonic:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -426,6 +585,11 @@ define i32 @load32_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load32_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i32, ptr %p monotonic, align 4
   ret i32 %v
 }
@@ -447,6 +611,12 @@ define i32 @load32_acquire(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    fence r, rw
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load32_acquire:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load32_acquire:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -462,6 +632,12 @@ define i32 @load32_acquire(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load32_acquire:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i32, ptr %p acquire, align 4
   ret i32 %v
 }
@@ -484,6 +660,13 @@ define i32 @load32_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    fence r, rw
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: load32_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: load32_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -500,6 +683,13 @@ define i32 @load32_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    lw a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load32_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i32, ptr %p seq_cst, align 4
   ret i32 %v
 }
@@ -521,6 +711,11 @@ define void @store32_unordered(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store32_unordered:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store32_unordered:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -536,6 +731,11 @@ define void @store32_unordered(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store32_unordered:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i32 0, ptr %p unordered, align 4
   ret void
 }
@@ -557,6 +757,11 @@ define void @store32_monotonic(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store32_monotonic:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store32_monotonic:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -572,6 +777,11 @@ define void @store32_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store32_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i32 0, ptr %p monotonic, align 4
   ret void
 }
@@ -594,6 +804,12 @@ define void @store32_release(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store32_release:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV32-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store32_release:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -610,6 +826,12 @@ define void @store32_release(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store32_release:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i32 0, ptr %p release, align 4
   ret void
 }
@@ -632,6 +854,13 @@ define void @store32_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: store32_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV32-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: store32_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -648,6 +877,13 @@ define void @store32_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sw zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store32_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sw zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i32 0, ptr %p seq_cst, align 4
   ret void
 }
@@ -674,6 +910,16 @@ define i32 @rmw32_add_monotonic(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_add_monotonic:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_add_monotonic:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -694,6 +940,16 @@ define i32 @rmw32_add_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_add_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i32 1 monotonic, align 4
   ret i32 %v
 }
@@ -720,6 +976,16 @@ define i32 @rmw32_add_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_add_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_add_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -740,6 +1006,16 @@ define i32 @rmw32_add_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_add_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -766,6 +1042,16 @@ define i32 @rmw32_sub_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_sub_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_sub_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_sub_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -786,6 +1072,16 @@ define i32 @rmw32_sub_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_sub_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_sub_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw sub ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -812,6 +1108,16 @@ define i32 @rmw32_and_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_and_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_and_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_and_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -832,6 +1138,16 @@ define i32 @rmw32_and_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_and_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_and_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw and ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -858,6 +1174,16 @@ define i32 @rmw32_nand_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_nand_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_nand_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_nand_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -878,6 +1204,16 @@ define i32 @rmw32_nand_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_nand_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_nand_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw nand ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -904,6 +1240,16 @@ define i32 @rmw32_or_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_or_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_or_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_or_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -924,6 +1270,16 @@ define i32 @rmw32_or_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_or_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_or_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw or ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -950,6 +1306,16 @@ define i32 @rmw32_xor_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_xor_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_xor_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_xor_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -970,6 +1336,16 @@ define i32 @rmw32_xor_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_xor_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_xor_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw xor ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1018,6 +1394,16 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_max_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_max_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_max_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1061,6 +1447,16 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_max_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_max_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw max ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1112,6 +1508,16 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_min_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_min_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_min_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1157,6 +1563,16 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_min_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_min_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw min ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1198,6 +1614,16 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_umax_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umax_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_umax_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1241,6 +1667,16 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_umax_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umax_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw umax ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1292,6 +1728,16 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_umin_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umin_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_umin_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1337,6 +1783,16 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_umin_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umin_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw umin ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1363,6 +1819,16 @@ define i32 @rmw32_xchg_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_xchg_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_lock_test_and_set_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_xchg_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -1383,6 +1849,16 @@ define i32 @rmw32_xchg_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_xchg_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_lock_test_and_set_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw xchg ptr %p, i32 1 seq_cst, align 4
   ret i32 %v
 }
@@ -1443,6 +1919,31 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_fadd_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:  .LBB28_1: # %atomicrmw.start
+; RV32-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV32-ATOMIC-TRAILING-NEXT:    call __addsf3 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    bne a0, s1, .LBB28_1
+; RV32-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_fadd_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1502,6 +2003,36 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_fadd_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    lw s1, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB28_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call __addsf3 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    sext.w s2, s1
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB28_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4
   ret float %v
 }
@@ -1562,6 +2093,31 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_fsub_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:  .LBB29_1: # %atomicrmw.start
+; RV32-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lui a1, 784384
+; RV32-ATOMIC-TRAILING-NEXT:    call __addsf3 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    bne a0, s1, .LBB29_1
+; RV32-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_fsub_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1621,6 +2177,36 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_fsub_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    lw s1, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB29_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    lui a1, 784384
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call __addsf3 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    sext.w s2, s1
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB29_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4
   ret float %v
 }
@@ -1681,6 +2267,31 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_fmin_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:  .LBB30_1: # %atomicrmw.start
+; RV32-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV32-ATOMIC-TRAILING-NEXT:    call fminf at plt
+; RV32-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    bne a0, s1, .LBB30_1
+; RV32-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_fmin_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1740,6 +2351,36 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_fmin_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    lw s1, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB30_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call fminf at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    sext.w s2, s1
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB30_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4
   ret float %v
 }
@@ -1800,6 +2441,31 @@ define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: rmw32_fmax_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lw a0, 0(a0)
+; RV32-ATOMIC-TRAILING-NEXT:  .LBB31_1: # %atomicrmw.start
+; RV32-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV32-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV32-ATOMIC-TRAILING-NEXT:    call fmaxf at plt
+; RV32-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV32-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    bne a0, s1, .LBB31_1
+; RV32-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: rmw32_fmax_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -32
@@ -1859,6 +2525,36 @@ define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw32_fmax_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    lw s1, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB31_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    lui a1, 260096
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call fmaxf at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    sext.w s2, s1
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv s1, a0
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB31_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s1
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4
   ret float %v
 }
@@ -1890,6 +2586,17 @@ define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: cmpxchg32_monotonic:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: cmpxchg32_monotonic:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -1915,6 +2622,17 @@ define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg32_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i32 0, i32 1 monotonic monotonic
   %res.0 = extractvalue { i32, i1 } %res, 0
   ret i32 %res.0
@@ -1947,6 +2665,17 @@ define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
 ; RV32-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV32-ATOMIC-NEXT:    ret
 ;
+; RV32-ATOMIC-TRAILING-LABEL: cmpxchg32_seq_cst:
+; RV32-ATOMIC-TRAILING:       # %bb.0:
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV32-ATOMIC-TRAILING-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV32-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV32-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV32-ATOMIC-TRAILING-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV32-ATOMIC-TRAILING-NEXT:    ret
+;
 ; RV64-NO-ATOMIC-LABEL: cmpxchg32_seq_cst:
 ; RV64-NO-ATOMIC:       # %bb.0:
 ; RV64-NO-ATOMIC-NEXT:    addi sp, sp, -16
@@ -1972,6 +2701,17 @@ define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg32_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_4 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst
   %res.0 = extractvalue { i32, i1 } %res, 0
   ret i32 %res.0
@@ -2002,6 +2742,11 @@ define i64 @load64_unordered(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    ld a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load64_unordered:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i64, ptr %p unordered, align 8
   ret i64 %v
 }
@@ -2031,6 +2776,11 @@ define i64 @load64_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    ld a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load64_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i64, ptr %p monotonic, align 8
   ret i64 %v
 }
@@ -2061,6 +2811,12 @@ define i64 @load64_acquire(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load64_acquire:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i64, ptr %p acquire, align 8
   ret i64 %v
 }
@@ -2092,6 +2848,13 @@ define i64 @load64_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld a0, 0(a0)
 ; RV64-ATOMIC-NEXT:    fence r, rw
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: load64_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence r, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = load atomic i64, ptr %p seq_cst, align 8
   ret i64 %v
 }
@@ -2124,6 +2887,11 @@ define void @store64_unordered(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    sd zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store64_unordered:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    sd zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i64 0, ptr %p unordered, align 8
   ret void
 }
@@ -2156,6 +2924,11 @@ define void @store64_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC:       # %bb.0:
 ; RV64-ATOMIC-NEXT:    sd zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store64_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    sd zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i64 0, ptr %p monotonic, align 8
   ret void
 }
@@ -2189,6 +2962,12 @@ define void @store64_release(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sd zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store64_release:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sd zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i64 0, ptr %p release, align 8
   ret void
 }
@@ -2222,6 +3001,13 @@ define void @store64(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    fence rw, w
 ; RV64-ATOMIC-NEXT:    sd zero, 0(a0)
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: store64:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, w
+; RV64-ATOMIC-TRAILING-NEXT:    sd zero, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    fence rw, rw
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   store atomic i64 0, ptr %p seq_cst, align 8
   ret void
 }
@@ -2259,6 +3045,16 @@ define i64 @rmw64_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i64 1 monotonic, align 8
   ret i64 %v
 }
@@ -2296,6 +3092,16 @@ define i64 @rmw64_add_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_add_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_add_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw add ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2333,6 +3139,16 @@ define i64 @rmw64_sub_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_sub_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_sub_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw sub ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2370,6 +3186,16 @@ define i64 @rmw64_and_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_and_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_and_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw and ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2407,6 +3233,16 @@ define i64 @rmw64_nand_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_nand_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_nand_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw nand ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2444,6 +3280,16 @@ define i64 @rmw64_or_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_or_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_or_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw or ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2481,6 +3327,16 @@ define i64 @rmw64_xor_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_xor_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_xor_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw xor ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2576,6 +3432,16 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_max_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_max_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw max ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2673,6 +3539,16 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_min_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_min_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw min ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2755,6 +3631,16 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_umax_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umax_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw umax ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2846,6 +3732,16 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_umin_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_fetch_and_umin_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw umin ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2883,6 +3779,16 @@ define i64 @rmw64_xchg_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_xchg_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 1
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_lock_test_and_set_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw xchg ptr %p, i64 1 seq_cst, align 8
   ret i64 %v
 }
@@ -2989,6 +3895,35 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_fadd_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    li s1, 1023
+; RV64-ATOMIC-TRAILING-NEXT:    slli s1, s1, 52
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB54_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    mv s2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call __adddf3 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB54_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 8
   ret double %v
 }
@@ -3095,6 +4030,35 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_fsub_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    li s1, -1025
+; RV64-ATOMIC-TRAILING-NEXT:    slli s1, s1, 52
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB55_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    mv s2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call __adddf3 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB55_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 8
   ret double %v
 }
@@ -3201,6 +4165,35 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_fmin_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    li s1, 1023
+; RV64-ATOMIC-TRAILING-NEXT:    slli s1, s1, 52
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB56_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    mv s2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call fmin at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB56_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 8
   ret double %v
 }
@@ -3307,6 +4300,35 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 32
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: rmw64_fmax_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -32
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    mv s0, a0
+; RV64-ATOMIC-TRAILING-NEXT:    ld a0, 0(a0)
+; RV64-ATOMIC-TRAILING-NEXT:    li s1, 1023
+; RV64-ATOMIC-TRAILING-NEXT:    slli s1, s1, 52
+; RV64-ATOMIC-TRAILING-NEXT:  .LBB57_1: # %atomicrmw.start
+; RV64-ATOMIC-TRAILING-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-ATOMIC-TRAILING-NEXT:    mv s2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s1
+; RV64-ATOMIC-TRAILING-NEXT:    call fmax at plt
+; RV64-ATOMIC-TRAILING-NEXT:    mv a2, a0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a0, s0
+; RV64-ATOMIC-TRAILING-NEXT:    mv a1, s2
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    bne a0, s2, .LBB57_1
+; RV64-ATOMIC-TRAILING-NEXT:  # %bb.2: # %atomicrmw.end
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 32
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 8
   ret double %v
 }
@@ -3355,6 +4377,17 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg64_monotonic:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i64 0, i64 1 monotonic monotonic
   %res.0 = extractvalue { i64, i1 } %res, 0
   ret i64 %res.0
@@ -3404,6 +4437,17 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
 ; RV64-ATOMIC-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-ATOMIC-NEXT:    addi sp, sp, 16
 ; RV64-ATOMIC-NEXT:    ret
+;
+; RV64-ATOMIC-TRAILING-LABEL: cmpxchg64_seq_cst:
+; RV64-ATOMIC-TRAILING:       # %bb.0:
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, -16
+; RV64-ATOMIC-TRAILING-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-ATOMIC-TRAILING-NEXT:    li a2, 1
+; RV64-ATOMIC-TRAILING-NEXT:    li a1, 0
+; RV64-ATOMIC-TRAILING-NEXT:    call __sync_val_compare_and_swap_8 at plt
+; RV64-ATOMIC-TRAILING-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-ATOMIC-TRAILING-NEXT:    addi sp, sp, 16
+; RV64-ATOMIC-TRAILING-NEXT:    ret
   %res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst
   %res.0 = extractvalue { i64, i1 } %res, 0
   ret i64 %res.0


        


More information about the llvm-commits mailing list