[llvm] [M68k] Add remaining addressing modes for Atomic operations (PR #115523)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 10:19:47 PST 2024


https://github.com/knickish updated https://github.com/llvm/llvm-project/pull/115523

>From 05a269c62be0f4a7bf2c11717b6dde09d7c72763 Mon Sep 17 00:00:00 2001
From: kirk <knickish at gmail.com>
Date: Thu, 7 Nov 2024 22:16:06 +0000
Subject: [PATCH 1/2] [M68k] add tests for atomics under large code model, with
 and without pic

---
 .../M68k/CodeModel/Large/Atomics/cmpxchg.ll   |  310 ++++
 .../M68k/CodeModel/Large/Atomics/fence.ll     |   41 +
 .../CodeModel/Large/Atomics/load-store.ll     | 1161 ++++++++++++++
 .../M68k/CodeModel/Large/Atomics/rmw.ll       | 1390 +++++++++++++++++
 .../M68k/CodeModel/{ => Large}/large-pic.ll   |    0
 .../{ => Large}/large-pie-global-access.ll    |    0
 .../M68k/CodeModel/{ => Large}/large-pie.ll   |    0
 .../CodeModel/{ => Large}/large-static.ll     |    0
 .../M68k/CodeModel/{ => Medium}/medium-pic.ll |    0
 .../{ => Medium}/medium-pie-global-access.ll  |    0
 .../M68k/CodeModel/{ => Medium}/medium-pie.ll |    0
 .../CodeModel/{ => Medium}/medium-static.ll   |    0
 .../M68k/CodeModel/{ => Small}/small-pic.ll   |    0
 .../{ => Small}/small-pie-global-access.ll    |    0
 .../M68k/CodeModel/{ => Small}/small-pie.ll   |    0
 .../CodeModel/{ => Small}/small-static.ll     |    0
 16 files changed, 2902 insertions(+)
 create mode 100644 llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
 create mode 100644 llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/fence.ll
 create mode 100644 llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/load-store.ll
 create mode 100644 llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/rmw.ll
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Large}/large-pic.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Large}/large-pie-global-access.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Large}/large-pie.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Large}/large-static.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Medium}/medium-pic.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Medium}/medium-pie-global-access.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Medium}/medium-pie.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Medium}/medium-static.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Small}/small-pic.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Small}/small-pie-global-access.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Small}/small-pie.ll (100%)
 rename llvm/test/CodeGen/M68k/CodeModel/{ => Small}/small-static.ll (100%)

diff --git a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
new file mode 100644
index 00000000000000..37ddc8e56dcdaf
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+
+ at thread_id = internal global <{ [5 x i8] }> <{ [5 x i8] zeroinitializer}>, align 4
+
+define { i32, i1 } @std_thread_new() {
+; NO-ATOMIC-LABEL: std_thread_new:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    lea (thread_id,%pc), %a0
+; NO-ATOMIC-NEXT:    move.l %a0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (8,%sp)
+; NO-ATOMIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_val_compare_and_swap_4
+; NO-ATOMIC-NEXT:    cmpi.l #0, %d0
+; NO-ATOMIC-NEXT:    seq %d1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: std_thread_new:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    lea (thread_id,%pc), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %a0, (%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #1, (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_val_compare_and_swap_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    cmpi.l #0, %d0
+; NO-ATOMIC-PIC-NEXT:    seq %d1
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: std_thread_new:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    lea (thread_id,%pc), %a0
+; ATOMIC-NEXT:    moveq #1, %d1
+; ATOMIC-NEXT:    moveq #0, %d0
+; ATOMIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-NEXT:    cmpi.l #0, %d0
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: std_thread_new:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    lea (thread_id,%pc), %a0
+; ATOMIC-PIC-NEXT:    moveq #1, %d1
+; ATOMIC-PIC-NEXT:    moveq #0, %d0
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    cmpi.l #0, %d0
+; ATOMIC-PIC-NEXT:    seq %d1
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %1 = cmpxchg ptr @thread_id, i32 0, i32 1 acquire monotonic, align 4
+  ret { i32, i1 } %1
+}
+
+define i1 @cmpxchg_i8_monotonic_monotonic(i8 %cmp, i8 %new, ptr %mem) nounwind {
+; NO-ATOMIC-LABEL: cmpxchg_i8_monotonic_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    movem.l %d2, (16,%sp) ; 8-byte Folded Spill
+; NO-ATOMIC-NEXT:    move.b (31,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-NEXT:    move.b (27,%sp), %d2
+; NO-ATOMIC-NEXT:    move.l %d2, %d0
+; NO-ATOMIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_val_compare_and_swap_1
+; NO-ATOMIC-NEXT:    sub.b %d2, %d0
+; NO-ATOMIC-NEXT:    seq %d0
+; NO-ATOMIC-NEXT:    movem.l (16,%sp), %d2 ; 8-byte Folded Reload
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: cmpxchg_i8_monotonic_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    movem.l %d2, (16,%sp) ; 8-byte Folded Spill
+; NO-ATOMIC-PIC-NEXT:    move.b (31,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.b (27,%sp), %d2
+; NO-ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_val_compare_and_swap_1 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    sub.b %d2, %d0
+; NO-ATOMIC-PIC-NEXT:    seq %d0
+; NO-ATOMIC-PIC-NEXT:    movem.l (16,%sp), %d2 ; 8-byte Folded Reload
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: cmpxchg_i8_monotonic_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.b (15,%sp), %d0
+; ATOMIC-NEXT:    move.b (11,%sp), %d1
+; ATOMIC-NEXT:    move.b %d1, %d2
+; ATOMIC-NEXT:    cas.b %d2, %d0, (%a0)
+; ATOMIC-NEXT:    sub.b %d1, %d2
+; ATOMIC-NEXT:    seq %d0
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: cmpxchg_i8_monotonic_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #4, %sp
+; ATOMIC-PIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (15,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.b (11,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.b %d1, %d2
+; ATOMIC-PIC-NEXT:    cas.b %d2, %d0, (%a0)
+; ATOMIC-PIC-NEXT:    sub.b %d1, %d2
+; ATOMIC-PIC-NEXT:    seq %d0
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #4, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %res = cmpxchg ptr %mem, i8 %cmp, i8 %new monotonic monotonic
+  %val = extractvalue {i8, i1} %res, 1
+  ret i1 %val
+}
+
+define i16 @cmpxchg_i16_release_monotonic(i16 %cmp, i16 %new, ptr %mem) nounwind {
+; NO-ATOMIC-LABEL: cmpxchg_i16_release_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.w (22,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_val_compare_and_swap_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: cmpxchg_i16_release_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.w (22,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_val_compare_and_swap_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: cmpxchg_i16_release_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (12,%sp), %a0
+; ATOMIC-NEXT:    move.w (10,%sp), %d1
+; ATOMIC-NEXT:    move.w (6,%sp), %d0
+; ATOMIC-NEXT:    cas.w %d0, %d1, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: cmpxchg_i16_release_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (12,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (10,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.w (6,%sp), %d0
+; ATOMIC-PIC-NEXT:    cas.w %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  %res = cmpxchg ptr %mem, i16 %cmp, i16 %new release monotonic
+  %val = extractvalue {i16, i1} %res, 0
+  ret i16 %val
+}
+
+define i32 @cmpxchg_i32_release_acquire(i32 %cmp, i32 %new, ptr %mem) nounwind {
+; NO-ATOMIC-LABEL: cmpxchg_i32_release_acquire:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_val_compare_and_swap_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: cmpxchg_i32_release_acquire:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_val_compare_and_swap_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: cmpxchg_i32_release_acquire:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (12,%sp), %a0
+; ATOMIC-NEXT:    move.l (8,%sp), %d1
+; ATOMIC-NEXT:    move.l (4,%sp), %d0
+; ATOMIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: cmpxchg_i32_release_acquire:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (12,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %d0
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  %res = cmpxchg ptr %mem, i32 %cmp, i32 %new release acquire
+  %val = extractvalue {i32, i1} %res, 0
+  ret i32 %val
+}
+
+define i64 @cmpxchg_i64_seqcst_seqcst(i64 %cmp, i64 %new, ptr %mem) nounwind {
+; NO-ATOMIC-LABEL: cmpxchg_i64_seqcst_seqcst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #36, %sp
+; NO-ATOMIC-NEXT:    move.l (44,%sp), (28,%sp)
+; NO-ATOMIC-NEXT:    move.l (40,%sp), (24,%sp)
+; NO-ATOMIC-NEXT:    lea (24,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %a0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l #5, (20,%sp)
+; NO-ATOMIC-NEXT:    move.l #5, (16,%sp)
+; NO-ATOMIC-NEXT:    move.l (52,%sp), (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (48,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (56,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_compare_exchange_8
+; NO-ATOMIC-NEXT:    move.l (28,%sp), %d1
+; NO-ATOMIC-NEXT:    move.l (24,%sp), %d0
+; NO-ATOMIC-NEXT:    adda.l #36, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: cmpxchg_i64_seqcst_seqcst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #36, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l (44,%sp), (28,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (40,%sp), (24,%sp)
+; NO-ATOMIC-PIC-NEXT:    lea (24,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %a0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (20,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (16,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (52,%sp), (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (48,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (56,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_compare_exchange_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), %d1
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    adda.l #36, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: cmpxchg_i64_seqcst_seqcst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #36, %sp
+; ATOMIC-NEXT:    move.l (44,%sp), (28,%sp)
+; ATOMIC-NEXT:    move.l (40,%sp), (24,%sp)
+; ATOMIC-NEXT:    lea (24,%sp), %a0
+; ATOMIC-NEXT:    move.l %a0, (4,%sp)
+; ATOMIC-NEXT:    move.l #5, (20,%sp)
+; ATOMIC-NEXT:    move.l #5, (16,%sp)
+; ATOMIC-NEXT:    move.l (52,%sp), (12,%sp)
+; ATOMIC-NEXT:    move.l (48,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (56,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_compare_exchange_8
+; ATOMIC-NEXT:    move.l (28,%sp), %d1
+; ATOMIC-NEXT:    move.l (24,%sp), %d0
+; ATOMIC-NEXT:    adda.l #36, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: cmpxchg_i64_seqcst_seqcst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #36, %sp
+; ATOMIC-PIC-NEXT:    move.l (44,%sp), (28,%sp)
+; ATOMIC-PIC-NEXT:    move.l (40,%sp), (24,%sp)
+; ATOMIC-PIC-NEXT:    lea (24,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %a0, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l #5, (20,%sp)
+; ATOMIC-PIC-NEXT:    move.l #5, (16,%sp)
+; ATOMIC-PIC-NEXT:    move.l (52,%sp), (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (48,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (56,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_compare_exchange_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), %d0
+; ATOMIC-PIC-NEXT:    adda.l #36, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %res = cmpxchg ptr %mem, i64 %cmp, i64 %new seq_cst seq_cst
+  %val = extractvalue {i64, i1} %res, 0
+  ret i64 %val
+}
diff --git a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/fence.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/fence.ll
new file mode 100644
index 00000000000000..727c4d1192b87b
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/fence.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=m68k-linux-gnu --code-model=large < %s | FileCheck %s
+; RUN: llc -mtriple=m68k-linux-gnu --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=PIC
+
+; M68k's libgcc does NOT have __sync_synchronize so we shouldn't
+; lower to that.
+
+define void @atomic_fence() {
+; CHECK-LABEL: atomic_fence:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ; %bb.0: ; %entry
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    rts
+;
+; PIC-LABEL: atomic_fence:
+; PIC:         .cfi_startproc
+; PIC-NEXT:  ; %bb.0: ; %entry
+; PIC-NEXT:    ;APP
+; PIC-NEXT:    ;NO_APP
+; PIC-NEXT:    ;APP
+; PIC-NEXT:    ;NO_APP
+; PIC-NEXT:    ;APP
+; PIC-NEXT:    ;NO_APP
+; PIC-NEXT:    ;APP
+; PIC-NEXT:    ;NO_APP
+; PIC-NEXT:    rts
+entry:
+  fence acquire
+  fence release
+  fence acq_rel
+  fence seq_cst
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/load-store.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/load-store.ll
new file mode 100644
index 00000000000000..a59a40d8e9fd23
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/load-store.ll
@@ -0,0 +1,1161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+
+define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i8_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i8_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i8_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i8_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i8, ptr %a unordered, align 1
+  ret i8 %1
+}
+
+define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i8_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i8_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i8_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i8_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i8, ptr %a monotonic, align 1
+  ret i8 %1
+}
+
+define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i8_acquire:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i8_acquire:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i8_acquire:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i8_acquire:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i8, ptr %a acquire, align 1
+  ret i8 %1
+}
+
+define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i8_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i8_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i8_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i8_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i8, ptr %a seq_cst, align 1
+  ret i8 %1
+}
+
+define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i16_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i16_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i16_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i16_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i16, ptr %a unordered, align 2
+  ret i16 %1
+}
+
+define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i16_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i16_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i16_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i16_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i16, ptr %a monotonic, align 2
+  ret i16 %1
+}
+
+define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i16_acquire:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i16_acquire:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i16_acquire:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i16_acquire:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i16, ptr %a acquire, align 2
+  ret i16 %1
+}
+
+define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i16_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i16_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i16_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i16_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i16, ptr %a seq_cst, align 2
+  ret i16 %1
+}
+
+define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i32_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i32_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i32_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i32_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i32, ptr %a unordered, align 4
+  ret i32 %1
+}
+
+define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i32_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i32_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i32_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i32_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i32, ptr %a monotonic, align 4
+  ret i32 %1
+}
+
+define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i32_acquire:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i32_acquire:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i32_acquire:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i32_acquire:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i32, ptr %a acquire, align 4
+  ret i32 %1
+}
+
+define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i32_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i32_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i32_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i32_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i32, ptr %a seq_cst, align 4
+  ret i32 %1
+}
+
+define i64 @atomic_load_i64_unordered(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i64_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_load_8
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i64_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i64_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #12, %sp
+; ATOMIC-NEXT:    move.l #0, (4,%sp)
+; ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_load_8
+; ATOMIC-NEXT:    adda.l #12, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i64_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i64, ptr %a unordered, align 8
+  ret i64 %1
+}
+
+define i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i64_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_load_8
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i64_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i64_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #12, %sp
+; ATOMIC-NEXT:    move.l #0, (4,%sp)
+; ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_load_8
+; ATOMIC-NEXT:    adda.l #12, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i64_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i64, ptr %a monotonic, align 8
+  ret i64 %1
+}
+
+define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i64_acquire:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.l #2, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_load_8
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i64_acquire:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #2, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i64_acquire:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #12, %sp
+; ATOMIC-NEXT:    move.l #2, (4,%sp)
+; ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_load_8
+; ATOMIC-NEXT:    adda.l #12, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i64_acquire:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; ATOMIC-PIC-NEXT:    move.l #2, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i64, ptr %a acquire, align 8
+  ret i64 %1
+}
+
+define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
+; NO-ATOMIC-LABEL: atomic_load_i64_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    move.l #5, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_load_8
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_load_i64_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_load_i64_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #12, %sp
+; ATOMIC-NEXT:    move.l #5, (4,%sp)
+; ATOMIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_load_8
+; ATOMIC-NEXT:    adda.l #12, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_load_i64_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; ATOMIC-PIC-NEXT:    move.l #5, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_load_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %1 = load atomic i64, ptr %a seq_cst, align 8
+  ret i64 %1
+}
+
+define void @atomic_store_i8_unordered(ptr %a, i8 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i8_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i8_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i8_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i8_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i8 %val, ptr %a unordered, align 1
+  ret void
+}
+
+define void @atomic_store_i8_monotonic(ptr %a, i8 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i8_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i8_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i8_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i8_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i8 %val, ptr %a monotonic, align 1
+  ret void
+}
+
+define void @atomic_store_i8_release(ptr %a, i8 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i8_release:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i8_release:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i8_release:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i8_release:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i8 %val, ptr %a release, align 1
+  ret void
+}
+
+define void @atomic_store_i8_seq_cst(ptr %a, i8 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i8_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i8_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i8_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i8_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.b (11,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i8 %val, ptr %a seq_cst, align 1
+  ret void
+}
+
+define void @atomic_store_i16_unordered(ptr %a, i16 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i16_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i16_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i16_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i16_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i16 %val, ptr %a unordered, align 2
+  ret void
+}
+
+define void @atomic_store_i16_monotonic(ptr %a, i16 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i16_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i16_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i16_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i16_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i16 %val, ptr %a monotonic, align 2
+  ret void
+}
+
+define void @atomic_store_i16_release(ptr %a, i16 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i16_release:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i16_release:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i16_release:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i16_release:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i16 %val, ptr %a release, align 2
+  ret void
+}
+
+define void @atomic_store_i16_seq_cst(ptr %a, i16 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i16_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i16_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i16_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i16_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.w (10,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i16 %val, ptr %a seq_cst, align 2
+  ret void
+}
+
+define void @atomic_store_i32_unordered(ptr %a, i32 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i32_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i32_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i32_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i32_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i32 %val, ptr %a unordered, align 4
+  ret void
+}
+
+define void @atomic_store_i32_monotonic(ptr %a, i32 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i32_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i32_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i32_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i32_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i32 %val, ptr %a monotonic, align 4
+  ret void
+}
+
+define void @atomic_store_i32_release(ptr %a, i32 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i32_release:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i32_release:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i32_release:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i32_release:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i32 %val, ptr %a release, align 4
+  ret void
+}
+
+define void @atomic_store_i32_seq_cst(ptr %a, i32 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i32_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i32_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i32_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i32_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %d0, (%a0)
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i32 %val, ptr %a seq_cst, align 4
+  ret void
+}
+
+define void @atomic_store_i64_unordered(ptr %a, i64 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i64_unordered:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    move.l #0, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_store_8
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i64_unordered:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #0, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i64_unordered:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #20, %sp
+; ATOMIC-NEXT:    move.l #0, (12,%sp)
+; ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_store_8
+; ATOMIC-NEXT:    adda.l #20, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i64_unordered:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; ATOMIC-PIC-NEXT:    move.l #0, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i64 %val, ptr %a unordered, align 8
+  ret void
+}
+
+define void @atomic_store_i64_monotonic(ptr %a, i64 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i64_monotonic:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    move.l #0, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_store_8
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i64_monotonic:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #0, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i64_monotonic:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #20, %sp
+; ATOMIC-NEXT:    move.l #0, (12,%sp)
+; ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_store_8
+; ATOMIC-NEXT:    adda.l #20, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i64_monotonic:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; ATOMIC-PIC-NEXT:    move.l #0, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i64 %val, ptr %a monotonic, align 8
+  ret void
+}
+
+define void @atomic_store_i64_release(ptr %a, i64 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i64_release:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    move.l #3, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_store_8
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i64_release:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #3, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i64_release:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #20, %sp
+; ATOMIC-NEXT:    move.l #3, (12,%sp)
+; ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_store_8
+; ATOMIC-NEXT:    adda.l #20, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i64_release:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; ATOMIC-PIC-NEXT:    move.l #3, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i64 %val, ptr %a release, align 8
+  ret void
+}
+
+define void @atomic_store_i64_seq_cst(ptr %a, i64 %val) nounwind {
+; NO-ATOMIC-LABEL: atomic_store_i64_seq_cst:
+; NO-ATOMIC:       ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    move.l #5, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_store_8
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomic_store_i64_seq_cst:
+; NO-ATOMIC-PIC:       ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomic_store_i64_seq_cst:
+; ATOMIC:       ; %bb.0:
+; ATOMIC-NEXT:    suba.l #20, %sp
+; ATOMIC-NEXT:    move.l #5, (12,%sp)
+; ATOMIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_store_8
+; ATOMIC-NEXT:    adda.l #20, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomic_store_i64_seq_cst:
+; ATOMIC-PIC:       ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; ATOMIC-PIC-NEXT:    move.l #5, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (32,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_store_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; ATOMIC-PIC-NEXT:    rts
+  store atomic i64 %val, ptr %a seq_cst, align 8
+  ret void
+}
+
+define void @store_arid(ptr nonnull align 4 %a) {
+; NO-ATOMIC-LABEL: store_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    moveq #1, %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (32,%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: store_arid:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    moveq #1, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (32,%a0)
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: store_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    moveq #1, %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (32,%a0)
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: store_arid:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    moveq #1, %d0
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l %d0, (32,%a0)
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %1 = getelementptr inbounds i32, ptr %a, i32 8
+  store atomic i32 1, ptr %1 seq_cst, align 4
+  br label %exit
+
+exit:                                              ; preds = %start
+  ret void
+}
+
+define i32 @load_arid(ptr nonnull align 4 %a) {
+; NO-ATOMIC-LABEL: load_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (32,%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: load_arid:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: load_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (32,%a0), %d0
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: load_arid:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (32,%a0), %d0
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %1 = getelementptr inbounds i32, ptr %a, i32 8
+  %2 = load atomic i32, ptr %1 seq_cst, align 4
+  br label %exit
+
+exit:                                              ; preds = %start
+  ret i32 %2
+}
diff --git a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/rmw.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/rmw.ll
new file mode 100644
index 00000000000000..b4c2bb1d223c97
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/rmw.ll
@@ -0,0 +1,1390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 -verify-machineinstrs --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 -verify-machineinstrs --code-model=large | FileCheck %s --check-prefix=NO-ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68000 -verify-machineinstrs --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68010 -verify-machineinstrs --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=NO-ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 -verify-machineinstrs --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 -verify-machineinstrs --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 -verify-machineinstrs --code-model=large | FileCheck %s --check-prefix=ATOMIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68020 -verify-machineinstrs --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68030 -verify-machineinstrs --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+; RUN: llc %s -o - -mtriple=m68k -mcpu=M68040 -verify-machineinstrs --code-model=large --relocation-model=pic | FileCheck %s --check-prefix=ATOMIC-PIC
+
+define i8 @atomicrmw_add_i8(i8 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_add_i8:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_add_1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_add_i8:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_add_1 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_add_i8:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-NEXT:    move.b %d2, %d0
+; ATOMIC-NEXT:  .LBB0_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.b %d2, %d3
+; ATOMIC-NEXT:    add.b %d1, %d3
+; ATOMIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.b %d0, %d3
+; ATOMIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    bne .LBB0_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_add_i8:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.b %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB0_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d2, %d3
+; ATOMIC-PIC-NEXT:    add.b %d1, %d3
+; ATOMIC-PIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.b %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB0_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw add ptr %ptr, i8 %val monotonic
+  ret i8 %old
+}
+
+define i16 @atomicrmw_sub_i16(i16 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i16:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_sub_i16:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_sub_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i16:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-NEXT:    move.w %d2, %d0
+; ATOMIC-NEXT:  .LBB1_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.w %d2, %d3
+; ATOMIC-NEXT:    sub.w %d1, %d3
+; ATOMIC-NEXT:    cas.w %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.w %d0, %d3
+; ATOMIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.w %d0, %d2
+; ATOMIC-NEXT:    bne .LBB1_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_sub_i16:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.w %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB1_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d2, %d3
+; ATOMIC-PIC-NEXT:    sub.w %d1, %d3
+; ATOMIC-PIC-NEXT:    cas.w %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.w %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.w %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB1_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw sub ptr %ptr, i16 %val acquire
+  ret i16 %old
+}
+
+define i32 @atomicrmw_and_i32(i32 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_and_i32:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_and_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_and_i32:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_and_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_and_i32:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-NEXT:    move.l %d2, %d0
+; ATOMIC-NEXT:  .LBB2_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.l %d2, %d3
+; ATOMIC-NEXT:    and.l %d1, %d3
+; ATOMIC-NEXT:    cas.l %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.l %d0, %d3
+; ATOMIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.l %d0, %d2
+; ATOMIC-NEXT:    bne .LBB2_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_and_i32:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB2_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d2, %d3
+; ATOMIC-PIC-NEXT:    and.l %d1, %d3
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.l %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.l %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB2_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw and ptr %ptr, i32 %val seq_cst
+  ret i32 %old
+}
+
+define i64 @atomicrmw_xor_i64(i64 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_xor_i64:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -24
+; NO-ATOMIC-NEXT:    move.l #3, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l (28,%sp), (8,%sp)
+; NO-ATOMIC-NEXT:    move.l (24,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (32,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_fetch_xor_8
+; NO-ATOMIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_xor_i64:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -24
+; NO-ATOMIC-PIC-NEXT:    move.l #3, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (32,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_fetch_xor_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_xor_i64:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #20, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -24
+; ATOMIC-NEXT:    move.l #3, (12,%sp)
+; ATOMIC-NEXT:    move.l (28,%sp), (8,%sp)
+; ATOMIC-NEXT:    move.l (24,%sp), (4,%sp)
+; ATOMIC-NEXT:    move.l (32,%sp), (%sp)
+; ATOMIC-NEXT:    jsr __atomic_fetch_xor_8
+; ATOMIC-NEXT:    adda.l #20, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_xor_i64:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #20, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -24
+; ATOMIC-PIC-NEXT:    move.l #3, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l (32,%sp), (%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_fetch_xor_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    adda.l #20, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw xor ptr %ptr, i64 %val release
+  ret i64 %old
+}
+
+define i8 @atomicrmw_or_i8(i8 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_or_i8:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_or_1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_or_i8:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_or_1 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_or_i8:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-NEXT:    move.b %d2, %d0
+; ATOMIC-NEXT:  .LBB4_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.b %d2, %d3
+; ATOMIC-NEXT:    or.b %d1, %d3
+; ATOMIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.b %d0, %d3
+; ATOMIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    bne .LBB4_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_or_i8:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.b %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB4_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d2, %d3
+; ATOMIC-PIC-NEXT:    or.b %d1, %d3
+; ATOMIC-PIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.b %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB4_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw or ptr %ptr, i8 %val monotonic
+  ret i8 %old
+}
+
+define i16 @atmoicrmw_nand_i16(i16 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atmoicrmw_nand_i16:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    movem.l %d2, (8,%sp) ; 8-byte Folded Spill
+; NO-ATOMIC-NEXT:    move.w (18,%sp), %d2
+; NO-ATOMIC-NEXT:    move.l %d2, %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_nand_2
+; NO-ATOMIC-NEXT:    move.w %d2, %d0
+; NO-ATOMIC-NEXT:    movem.l (8,%sp), %d2 ; 8-byte Folded Reload
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atmoicrmw_nand_i16:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    movem.l %d2, (8,%sp) ; 8-byte Folded Spill
+; NO-ATOMIC-PIC-NEXT:    move.w (18,%sp), %d2
+; NO-ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_nand_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    move.w %d2, %d0
+; NO-ATOMIC-PIC-NEXT:    movem.l (8,%sp), %d2 ; 8-byte Folded Reload
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atmoicrmw_nand_i16:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.w (14,%sp), %d0
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-NEXT:    move.w %d2, %d1
+; ATOMIC-NEXT:  .LBB5_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.w %d2, %d3
+; ATOMIC-NEXT:    and.w %d0, %d3
+; ATOMIC-NEXT:    not.w %d3
+; ATOMIC-NEXT:    cas.w %d1, %d3, (%a0)
+; ATOMIC-NEXT:    move.w %d1, %d3
+; ATOMIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.w %d1, %d2
+; ATOMIC-NEXT:    bne .LBB5_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atmoicrmw_nand_i16:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.w (14,%sp), %d0
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.w %d2, %d1
+; ATOMIC-PIC-NEXT:  .LBB5_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d2, %d3
+; ATOMIC-PIC-NEXT:    and.w %d0, %d3
+; ATOMIC-PIC-NEXT:    not.w %d3
+; ATOMIC-PIC-NEXT:    cas.w %d1, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.w %d1, %d3
+; ATOMIC-PIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.w %d1, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB5_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw nand ptr %ptr, i16 %val seq_cst
+  ret i16 %val
+}
+
+define i32 @atomicrmw_min_i32(i32 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_min_i32:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_min_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_min_i32:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_min_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_min_i32:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-NEXT:    bra .LBB6_1
+; ATOMIC-NEXT:  .LBB6_3: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB6_1 Depth=1
+; ATOMIC-NEXT:    move.l %d2, %d0
+; ATOMIC-NEXT:    cas.l %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.l %d0, %d3
+; ATOMIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.l %d0, %d2
+; ATOMIC-NEXT:    beq .LBB6_4
+; ATOMIC-NEXT:  .LBB6_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.l %d2, %d0
+; ATOMIC-NEXT:    sub.l %d1, %d0
+; ATOMIC-NEXT:    move.l %d2, %d3
+; ATOMIC-NEXT:    ble .LBB6_3
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB6_1 Depth=1
+; ATOMIC-NEXT:    move.l %d1, %d3
+; ATOMIC-NEXT:    bra .LBB6_3
+; ATOMIC-NEXT:  .LBB6_4: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_min_i32:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-PIC-NEXT:    bra .LBB6_1
+; ATOMIC-PIC-NEXT:  .LBB6_3: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB6_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.l %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.l %d0, %d2
+; ATOMIC-PIC-NEXT:    beq .LBB6_4
+; ATOMIC-PIC-NEXT:  .LBB6_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; ATOMIC-PIC-NEXT:    sub.l %d1, %d0
+; ATOMIC-PIC-NEXT:    move.l %d2, %d3
+; ATOMIC-PIC-NEXT:    ble .LBB6_3
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB6_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d1, %d3
+; ATOMIC-PIC-NEXT:    bra .LBB6_3
+; ATOMIC-PIC-NEXT:  .LBB6_4: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw min ptr %ptr, i32 %val acquire
+  ret i32 %old
+}
+
+define i64 @atomicrmw_max_i64(i64 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_max_i64:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #52, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -56
+; NO-ATOMIC-NEXT:    movem.l %d2-%d4/%a2-%a3, (32,%sp) ; 24-byte Folded Spill
+; NO-ATOMIC-NEXT:    move.l (60,%sp), %d3
+; NO-ATOMIC-NEXT:    move.l (56,%sp), %d4
+; NO-ATOMIC-NEXT:    move.l (64,%sp), %a2
+; NO-ATOMIC-NEXT:    move.l (4,%a2), %d1
+; NO-ATOMIC-NEXT:    move.l (%a2), %d0
+; NO-ATOMIC-NEXT:    lea (24,%sp), %a3
+; NO-ATOMIC-NEXT:    bra .LBB7_1
+; NO-ATOMIC-NEXT:  .LBB7_3: ; %atomicrmw.start
+; NO-ATOMIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; NO-ATOMIC-NEXT:    move.l %d1, (12,%sp)
+; NO-ATOMIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-NEXT:    move.l #5, (20,%sp)
+; NO-ATOMIC-NEXT:    move.l #5, (16,%sp)
+; NO-ATOMIC-NEXT:    jsr __atomic_compare_exchange_8
+; NO-ATOMIC-NEXT:    move.b %d0, %d2
+; NO-ATOMIC-NEXT:    move.l (28,%sp), %d1
+; NO-ATOMIC-NEXT:    move.l (24,%sp), %d0
+; NO-ATOMIC-NEXT:    cmpi.b #0, %d2
+; NO-ATOMIC-NEXT:    bne .LBB7_4
+; NO-ATOMIC-NEXT:  .LBB7_1: ; %atomicrmw.start
+; NO-ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; NO-ATOMIC-NEXT:    move.l %d0, (24,%sp)
+; NO-ATOMIC-NEXT:    move.l %d1, (28,%sp)
+; NO-ATOMIC-NEXT:    move.l %a2, (%sp)
+; NO-ATOMIC-NEXT:    move.l %a3, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l %d3, %d2
+; NO-ATOMIC-NEXT:    sub.l %d1, %d2
+; NO-ATOMIC-NEXT:    move.l %d4, %d2
+; NO-ATOMIC-NEXT:    subx.l %d0, %d2
+; NO-ATOMIC-NEXT:    slt %d2
+; NO-ATOMIC-NEXT:    cmpi.b #0, %d2
+; NO-ATOMIC-NEXT:    bne .LBB7_3
+; NO-ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; NO-ATOMIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; NO-ATOMIC-NEXT:    move.l %d3, %d1
+; NO-ATOMIC-NEXT:    move.l %d4, %d0
+; NO-ATOMIC-NEXT:    bra .LBB7_3
+; NO-ATOMIC-NEXT:  .LBB7_4: ; %atomicrmw.end
+; NO-ATOMIC-NEXT:    movem.l (32,%sp), %d2-%d4/%a2-%a3 ; 24-byte Folded Reload
+; NO-ATOMIC-NEXT:    adda.l #52, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_max_i64:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #52, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -56
+; NO-ATOMIC-PIC-NEXT:    movem.l %d2-%d4/%a2-%a3, (32,%sp) ; 24-byte Folded Spill
+; NO-ATOMIC-PIC-NEXT:    move.l (60,%sp), %d3
+; NO-ATOMIC-PIC-NEXT:    move.l (56,%sp), %d4
+; NO-ATOMIC-PIC-NEXT:    move.l (64,%sp), %a2
+; NO-ATOMIC-PIC-NEXT:    move.l (4,%a2), %d1
+; NO-ATOMIC-PIC-NEXT:    move.l (%a2), %d0
+; NO-ATOMIC-PIC-NEXT:    lea (24,%sp), %a3
+; NO-ATOMIC-PIC-NEXT:    bra .LBB7_1
+; NO-ATOMIC-PIC-NEXT:  .LBB7_3: ; %atomicrmw.start
+; NO-ATOMIC-PIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; NO-ATOMIC-PIC-NEXT:    move.l %d1, (12,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (8,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (20,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #5, (16,%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__atomic_compare_exchange_8 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; NO-ATOMIC-PIC-NEXT:    move.l (28,%sp), %d1
+; NO-ATOMIC-PIC-NEXT:    move.l (24,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    cmpi.b #0, %d2
+; NO-ATOMIC-PIC-NEXT:    bne .LBB7_4
+; NO-ATOMIC-PIC-NEXT:  .LBB7_1: ; %atomicrmw.start
+; NO-ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (24,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l %d1, (28,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l %a2, (%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l %a3, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l %d3, %d2
+; NO-ATOMIC-PIC-NEXT:    sub.l %d1, %d2
+; NO-ATOMIC-PIC-NEXT:    move.l %d4, %d2
+; NO-ATOMIC-PIC-NEXT:    subx.l %d0, %d2
+; NO-ATOMIC-PIC-NEXT:    slt %d2
+; NO-ATOMIC-PIC-NEXT:    cmpi.b #0, %d2
+; NO-ATOMIC-PIC-NEXT:    bne .LBB7_3
+; NO-ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; NO-ATOMIC-PIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; NO-ATOMIC-PIC-NEXT:    move.l %d3, %d1
+; NO-ATOMIC-PIC-NEXT:    move.l %d4, %d0
+; NO-ATOMIC-PIC-NEXT:    bra .LBB7_3
+; NO-ATOMIC-PIC-NEXT:  .LBB7_4: ; %atomicrmw.end
+; NO-ATOMIC-PIC-NEXT:    movem.l (32,%sp), %d2-%d4/%a2-%a3 ; 24-byte Folded Reload
+; NO-ATOMIC-PIC-NEXT:    adda.l #52, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_max_i64:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #52, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -56
+; ATOMIC-NEXT:    movem.l %d2-%d4/%a2-%a3, (32,%sp) ; 24-byte Folded Spill
+; ATOMIC-NEXT:    move.l (60,%sp), %d3
+; ATOMIC-NEXT:    move.l (56,%sp), %d4
+; ATOMIC-NEXT:    move.l (64,%sp), %a2
+; ATOMIC-NEXT:    move.l (4,%a2), %d1
+; ATOMIC-NEXT:    move.l (%a2), %d0
+; ATOMIC-NEXT:    lea (24,%sp), %a3
+; ATOMIC-NEXT:    bra .LBB7_1
+; ATOMIC-NEXT:  .LBB7_3: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; ATOMIC-NEXT:    move.l %d1, (12,%sp)
+; ATOMIC-NEXT:    move.l %d0, (8,%sp)
+; ATOMIC-NEXT:    move.l #5, (20,%sp)
+; ATOMIC-NEXT:    move.l #5, (16,%sp)
+; ATOMIC-NEXT:    jsr __atomic_compare_exchange_8
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    move.l (28,%sp), %d1
+; ATOMIC-NEXT:    move.l (24,%sp), %d0
+; ATOMIC-NEXT:    cmpi.b #0, %d2
+; ATOMIC-NEXT:    bne .LBB7_4
+; ATOMIC-NEXT:  .LBB7_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.l %d0, (24,%sp)
+; ATOMIC-NEXT:    move.l %d1, (28,%sp)
+; ATOMIC-NEXT:    move.l %a2, (%sp)
+; ATOMIC-NEXT:    move.l %a3, (4,%sp)
+; ATOMIC-NEXT:    move.l %d3, %d2
+; ATOMIC-NEXT:    sub.l %d1, %d2
+; ATOMIC-NEXT:    move.l %d4, %d2
+; ATOMIC-NEXT:    subx.l %d0, %d2
+; ATOMIC-NEXT:    slt %d2
+; ATOMIC-NEXT:    cmpi.b #0, %d2
+; ATOMIC-NEXT:    bne .LBB7_3
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; ATOMIC-NEXT:    move.l %d3, %d1
+; ATOMIC-NEXT:    move.l %d4, %d0
+; ATOMIC-NEXT:    bra .LBB7_3
+; ATOMIC-NEXT:  .LBB7_4: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (32,%sp), %d2-%d4/%a2-%a3 ; 24-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #52, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_max_i64:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #52, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -56
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d4/%a2-%a3, (32,%sp) ; 24-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (60,%sp), %d3
+; ATOMIC-PIC-NEXT:    move.l (56,%sp), %d4
+; ATOMIC-PIC-NEXT:    move.l (64,%sp), %a2
+; ATOMIC-PIC-NEXT:    move.l (4,%a2), %d1
+; ATOMIC-PIC-NEXT:    move.l (%a2), %d0
+; ATOMIC-PIC-NEXT:    lea (24,%sp), %a3
+; ATOMIC-PIC-NEXT:    bra .LBB7_1
+; ATOMIC-PIC-NEXT:  .LBB7_3: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d1, (12,%sp)
+; ATOMIC-PIC-NEXT:    move.l %d0, (8,%sp)
+; ATOMIC-PIC-NEXT:    move.l #5, (20,%sp)
+; ATOMIC-PIC-NEXT:    move.l #5, (16,%sp)
+; ATOMIC-PIC-NEXT:    jsr (__atomic_compare_exchange_8 at PLT,%pc)
+; ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; ATOMIC-PIC-NEXT:    move.l (28,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (24,%sp), %d0
+; ATOMIC-PIC-NEXT:    cmpi.b #0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB7_4
+; ATOMIC-PIC-NEXT:  .LBB7_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d0, (24,%sp)
+; ATOMIC-PIC-NEXT:    move.l %d1, (28,%sp)
+; ATOMIC-PIC-NEXT:    move.l %a2, (%sp)
+; ATOMIC-PIC-NEXT:    move.l %a3, (4,%sp)
+; ATOMIC-PIC-NEXT:    move.l %d3, %d2
+; ATOMIC-PIC-NEXT:    sub.l %d1, %d2
+; ATOMIC-PIC-NEXT:    move.l %d4, %d2
+; ATOMIC-PIC-NEXT:    subx.l %d0, %d2
+; ATOMIC-PIC-NEXT:    slt %d2
+; ATOMIC-PIC-NEXT:    cmpi.b #0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB7_3
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB7_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d3, %d1
+; ATOMIC-PIC-NEXT:    move.l %d4, %d0
+; ATOMIC-PIC-NEXT:    bra .LBB7_3
+; ATOMIC-PIC-NEXT:  .LBB7_4: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (32,%sp), %d2-%d4/%a2-%a3 ; 24-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #52, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw max ptr %ptr, i64 %val seq_cst
+  ret i64 %old
+}
+
+define i8 @atomicrmw_i8_umin(i8 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_i8_umin:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_umin_1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_i8_umin:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.b (19,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #255, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_umin_1 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_i8_umin:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-NEXT:    bra .LBB8_1
+; ATOMIC-NEXT:  .LBB8_3: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB8_1 Depth=1
+; ATOMIC-NEXT:    move.b %d2, %d0
+; ATOMIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.b %d0, %d3
+; ATOMIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    beq .LBB8_4
+; ATOMIC-NEXT:  .LBB8_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.b %d2, %d0
+; ATOMIC-NEXT:    sub.b %d1, %d0
+; ATOMIC-NEXT:    move.b %d2, %d3
+; ATOMIC-NEXT:    bls .LBB8_3
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB8_1 Depth=1
+; ATOMIC-NEXT:    move.b %d1, %d3
+; ATOMIC-NEXT:    bra .LBB8_3
+; ATOMIC-NEXT:  .LBB8_4: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_i8_umin:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.b (15,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.b (%a0), %d2
+; ATOMIC-PIC-NEXT:    bra .LBB8_1
+; ATOMIC-PIC-NEXT:  .LBB8_3: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB8_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d2, %d0
+; ATOMIC-PIC-NEXT:    cas.b %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.b %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.b %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; ATOMIC-PIC-NEXT:    beq .LBB8_4
+; ATOMIC-PIC-NEXT:  .LBB8_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d2, %d0
+; ATOMIC-PIC-NEXT:    sub.b %d1, %d0
+; ATOMIC-PIC-NEXT:    move.b %d2, %d3
+; ATOMIC-PIC-NEXT:    bls .LBB8_3
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB8_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d1, %d3
+; ATOMIC-PIC-NEXT:    bra .LBB8_3
+; ATOMIC-PIC-NEXT:  .LBB8_4: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw umin ptr %ptr, i8 %val release
+  ret i8 %old
+}
+
+define i16 @atomicrmw_umax_i16(i16 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_umax_i16:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_umax_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_umax_i16:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0:
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_umax_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_umax_i16:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0:
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-NEXT:    bra .LBB9_1
+; ATOMIC-NEXT:  .LBB9_3: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB9_1 Depth=1
+; ATOMIC-NEXT:    move.w %d2, %d0
+; ATOMIC-NEXT:    cas.w %d0, %d3, (%a0)
+; ATOMIC-NEXT:    move.w %d0, %d3
+; ATOMIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.w %d0, %d2
+; ATOMIC-NEXT:    beq .LBB9_4
+; ATOMIC-NEXT:  .LBB9_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.w %d2, %d0
+; ATOMIC-NEXT:    sub.w %d1, %d0
+; ATOMIC-NEXT:    move.w %d2, %d3
+; ATOMIC-NEXT:    bhi .LBB9_3
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; in Loop: Header=BB9_1 Depth=1
+; ATOMIC-NEXT:    move.w %d1, %d3
+; ATOMIC-NEXT:    bra .LBB9_3
+; ATOMIC-NEXT:  .LBB9_4: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_umax_i16:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0:
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-PIC-NEXT:    bra .LBB9_1
+; ATOMIC-PIC-NEXT:  .LBB9_3: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB9_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d2, %d0
+; ATOMIC-PIC-NEXT:    cas.w %d0, %d3, (%a0)
+; ATOMIC-PIC-NEXT:    move.w %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.w %d0, %d2
+; ATOMIC-PIC-NEXT:    beq .LBB9_4
+; ATOMIC-PIC-NEXT:  .LBB9_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d2, %d0
+; ATOMIC-PIC-NEXT:    sub.w %d1, %d0
+; ATOMIC-PIC-NEXT:    move.w %d2, %d3
+; ATOMIC-PIC-NEXT:    bhi .LBB9_3
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; in Loop: Header=BB9_1 Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d1, %d3
+; ATOMIC-PIC-NEXT:    bra .LBB9_3
+; ATOMIC-PIC-NEXT:  .LBB9_4: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+  %old = atomicrmw umax ptr %ptr, i16 %val seq_cst
+  ret i16 %old
+}
+
+define i16 @atomicrmw_xchg_i16(i16 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_xchg_i16:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %entry
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_lock_test_and_set_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_xchg_i16:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %entry
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.w (18,%sp), %d0
+; NO-ATOMIC-PIC-NEXT:    and.l #65535, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_lock_test_and_set_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_xchg_i16:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %entry
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-NEXT:    move.w %d2, %d0
+; ATOMIC-NEXT:  .LBB10_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    cas.w %d0, %d1, (%a0)
+; ATOMIC-NEXT:    move.w %d0, %d3
+; ATOMIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.w %d0, %d2
+; ATOMIC-NEXT:    bne .LBB10_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_xchg_i16:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %entry
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.w (14,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.w (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.w %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB10_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    cas.w %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    move.w %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.w %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.w %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB10_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+entry:
+  %old = atomicrmw xchg ptr %ptr, i16 %val monotonic
+  ret i16 %old
+}
+
+define i32 @atomicrmw_xchg_i32(i32 %val, ptr %ptr) {
+; NO-ATOMIC-LABEL: atomicrmw_xchg_i32:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %entry
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_lock_test_and_set_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_xchg_i32:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %entry
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l (20,%sp), (%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_lock_test_and_set_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_xchg_i32:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %entry
+; ATOMIC-NEXT:    suba.l #8, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-NEXT:    move.l %d2, %d0
+; ATOMIC-NEXT:  .LBB11_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-NEXT:    move.l %d0, %d3
+; ATOMIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-NEXT:    seq %d2
+; ATOMIC-NEXT:    sub.b #1, %d2
+; ATOMIC-NEXT:    move.l %d0, %d2
+; ATOMIC-NEXT:    bne .LBB11_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #8, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_xchg_i32:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %entry
+; ATOMIC-PIC-NEXT:    suba.l #8, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -12
+; ATOMIC-PIC-NEXT:    movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (12,%sp), %d1
+; ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %d2
+; ATOMIC-PIC-NEXT:    move.l %d2, %d0
+; ATOMIC-PIC-NEXT:  .LBB11_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    move.l %d0, %d3
+; ATOMIC-PIC-NEXT:    sub.l %d2, %d3
+; ATOMIC-PIC-NEXT:    seq %d2
+; ATOMIC-PIC-NEXT:    sub.b #1, %d2
+; ATOMIC-PIC-NEXT:    move.l %d0, %d2
+; ATOMIC-PIC-NEXT:    bne .LBB11_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #8, %sp
+; ATOMIC-PIC-NEXT:    rts
+entry:
+  %old = atomicrmw xchg ptr %ptr, i32 %val monotonic
+  ret i32 %old
+}
+
+define i8 @atomicrmw_sub_i8_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i8_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_sub_i8_arid:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_sub_1 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i8_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.b (4,%a0), %d1
+; ATOMIC-NEXT:    move.b %d1, %d0
+; ATOMIC-NEXT:  .LBB12_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.b %d1, %d2
+; ATOMIC-NEXT:    add.b #-1, %d2
+; ATOMIC-NEXT:    cas.b %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    sub.b %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.b %d0, %d1
+; ATOMIC-NEXT:    bne .LBB12_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_sub_i8_arid:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    suba.l #4, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-PIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-PIC-NEXT:    move.b (4,%a0), %d1
+; ATOMIC-PIC-NEXT:    move.b %d1, %d0
+; ATOMIC-PIC-NEXT:  .LBB12_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.b %d1, %d2
+; ATOMIC-PIC-NEXT:    add.b #-1, %d2
+; ATOMIC-PIC-NEXT:    cas.b %d0, %d2, (4,%a0)
+; ATOMIC-PIC-NEXT:    move.b %d0, %d2
+; ATOMIC-PIC-NEXT:    sub.b %d1, %d2
+; ATOMIC-PIC-NEXT:    seq %d1
+; ATOMIC-PIC-NEXT:    sub.b #1, %d1
+; ATOMIC-PIC-NEXT:    move.b %d0, %d1
+; ATOMIC-PIC-NEXT:    bne .LBB12_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #4, %sp
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i8 1 release, align 4
+  ret i8 %6
+}
+
+define i16 @atomicrmw_sub_i16_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i16_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_sub_i16_arid:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_sub_2 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i16_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.w (4,%a0), %d1
+; ATOMIC-NEXT:    move.w %d1, %d0
+; ATOMIC-NEXT:  .LBB13_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.w %d1, %d2
+; ATOMIC-NEXT:    add.w #-1, %d2
+; ATOMIC-NEXT:    cas.w %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.w %d0, %d2
+; ATOMIC-NEXT:    sub.w %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.w %d0, %d1
+; ATOMIC-NEXT:    bne .LBB13_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_sub_i16_arid:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    suba.l #4, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-PIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-PIC-NEXT:    move.w (4,%a0), %d1
+; ATOMIC-PIC-NEXT:    move.w %d1, %d0
+; ATOMIC-PIC-NEXT:  .LBB13_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.w %d1, %d2
+; ATOMIC-PIC-NEXT:    add.w #-1, %d2
+; ATOMIC-PIC-NEXT:    cas.w %d0, %d2, (4,%a0)
+; ATOMIC-PIC-NEXT:    move.w %d0, %d2
+; ATOMIC-PIC-NEXT:    sub.w %d1, %d2
+; ATOMIC-PIC-NEXT:    seq %d1
+; ATOMIC-PIC-NEXT:    sub.b #1, %d1
+; ATOMIC-PIC-NEXT:    move.w %d0, %d1
+; ATOMIC-PIC-NEXT:    bne .LBB13_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #4, %sp
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i16 1 release, align 4
+  ret i16 %6
+}
+
+define i32 @atomicrmw_sub_i32_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i32_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; NO-ATOMIC-PIC-LABEL: atomicrmw_sub_i32_arid:
+; NO-ATOMIC-PIC:         .cfi_startproc
+; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-PIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-PIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-PIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-PIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-PIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-PIC-NEXT:    jsr (__sync_fetch_and_sub_4 at PLT,%pc)
+; NO-ATOMIC-PIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-PIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i32_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.l (4,%a0), %d1
+; ATOMIC-NEXT:    move.l %d1, %d0
+; ATOMIC-NEXT:  .LBB14_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.l %d1, %d2
+; ATOMIC-NEXT:    add.l #-1, %d2
+; ATOMIC-NEXT:    cas.l %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.l %d0, %d2
+; ATOMIC-NEXT:    sub.l %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.l %d0, %d1
+; ATOMIC-NEXT:    bne .LBB14_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+;
+; ATOMIC-PIC-LABEL: atomicrmw_sub_i32_arid:
+; ATOMIC-PIC:         .cfi_startproc
+; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-PIC-NEXT:    suba.l #4, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-PIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-PIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-PIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-PIC-NEXT:    move.l (4,%a0), %d1
+; ATOMIC-PIC-NEXT:    move.l %d1, %d0
+; ATOMIC-PIC-NEXT:  .LBB14_1: ; %atomicrmw.start
+; ATOMIC-PIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-PIC-NEXT:    move.l %d1, %d2
+; ATOMIC-PIC-NEXT:    add.l #-1, %d2
+; ATOMIC-PIC-NEXT:    cas.l %d0, %d2, (4,%a0)
+; ATOMIC-PIC-NEXT:    move.l %d0, %d2
+; ATOMIC-PIC-NEXT:    sub.l %d1, %d2
+; ATOMIC-PIC-NEXT:    seq %d1
+; ATOMIC-PIC-NEXT:    sub.b #1, %d1
+; ATOMIC-PIC-NEXT:    move.l %d0, %d1
+; ATOMIC-PIC-NEXT:    bne .LBB14_1
+; ATOMIC-PIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #4, %sp
+; ATOMIC-PIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i32 1 release, align 4
+  ret i32 %6
+}
diff --git a/llvm/test/CodeGen/M68k/CodeModel/large-pic.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/large-pic.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/large-pic.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Large/large-pic.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/large-pie-global-access.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/large-pie-global-access.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/large-pie-global-access.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Large/large-pie-global-access.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/large-pie.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/large-pie.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/large-pie.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Large/large-pie.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/large-static.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/large-static.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/large-static.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Large/large-static.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/medium-pic.ll b/llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pic.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/medium-pic.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pic.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/medium-pie-global-access.ll b/llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pie-global-access.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/medium-pie-global-access.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pie-global-access.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/medium-pie.ll b/llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pie.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/medium-pie.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Medium/medium-pie.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/medium-static.ll b/llvm/test/CodeGen/M68k/CodeModel/Medium/medium-static.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/medium-static.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Medium/medium-static.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/small-pic.ll b/llvm/test/CodeGen/M68k/CodeModel/Small/small-pic.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/small-pic.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Small/small-pic.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/small-pie-global-access.ll b/llvm/test/CodeGen/M68k/CodeModel/Small/small-pie-global-access.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/small-pie-global-access.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Small/small-pie-global-access.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/small-pie.ll b/llvm/test/CodeGen/M68k/CodeModel/Small/small-pie.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/small-pie.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Small/small-pie.ll
diff --git a/llvm/test/CodeGen/M68k/CodeModel/small-static.ll b/llvm/test/CodeGen/M68k/CodeModel/Small/small-static.ll
similarity index 100%
rename from llvm/test/CodeGen/M68k/CodeModel/small-static.ll
rename to llvm/test/CodeGen/M68k/CodeModel/Small/small-static.ll

>From e5a800c7b96811c79a5340332221099dc9e16b79 Mon Sep 17 00:00:00 2001
From: kirk <knickish at gmail.com>
Date: Mon, 4 Nov 2024 19:19:23 +0000
Subject: [PATCH 2/2] [M68k] add all remaining addressing modes for atomic ops

---
 .../M68k/Disassembler/M68kDisassembler.cpp    |   8 +-
 llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp     |  17 +-
 llvm/lib/Target/M68k/M68kInstrAtomics.td      | 146 +++++++++++++++++-
 llvm/test/CodeGen/M68k/Atomics/load-store.ll  |  48 ++++++
 llvm/test/CodeGen/M68k/Atomics/rmw.ll         | 141 +++++++++++++++++
 .../M68k/CodeModel/Large/Atomics/cmpxchg.ll   |  23 ++-
 .../CodeGen/M68k/Control/non-cmov-switch.ll   | 126 +++++++++++++++
 llvm/test/CodeGen/M68k/TLS/tls-arid.ll        |  19 +++
 8 files changed, 511 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/CodeGen/M68k/Control/non-cmov-switch.ll
 create mode 100644 llvm/test/CodeGen/M68k/TLS/tls-arid.ll

diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index 7f0f737faccd0d..ce069ced66579a 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -19,8 +19,8 @@
 
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCDecoderOps.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Endian.h"
@@ -83,6 +83,12 @@ static DecodeStatus DecodeXR32RegisterClass(MCInst &Inst, uint64_t RegNo,
   return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
+static DecodeStatus DecodeXR32RegisterClass(MCInst &Inst, APInt RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return DecodeRegisterClass(Inst, RegNo.getZExtValue(), Address, Decoder);
+}
+
 static DecodeStatus DecodeXR16RegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
index f496085c88356a..2bb674cf4bacf4 100644
--- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
+++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
@@ -708,6 +708,20 @@ bool M68kDAGToDAGISel::SelectARIPD(SDNode *Parent, SDValue N, SDValue &Base) {
   return false;
 }
 
+static bool allowARIDWithDisp(SDNode *Parent) {
+  if (!Parent)
+    return false;
+  switch (Parent->getOpcode()) {
+  case ISD::LOAD:
+  case ISD::STORE:
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool M68kDAGToDAGISel::SelectARID(SDNode *Parent, SDValue N, SDValue &Disp,
                                   SDValue &Base) {
   LLVM_DEBUG(dbgs() << "Selecting AddrType::ARID: ");
@@ -740,7 +754,8 @@ bool M68kDAGToDAGISel::SelectARID(SDNode *Parent, SDValue N, SDValue &Disp,
   Base = AM.BaseReg;
 
   if (getSymbolicDisplacement(AM, SDLoc(N), Disp)) {
-    assert(!AM.Disp && "Should not be any displacement");
+    assert((!AM.Disp || allowARIDWithDisp(Parent)) &&
+           "Should not be any displacement");
     LLVM_DEBUG(dbgs() << "SUCCESS, matched Symbol\n");
     return true;
   }
diff --git a/llvm/lib/Target/M68k/M68kInstrAtomics.td b/llvm/lib/Target/M68k/M68kInstrAtomics.td
index 9203a3ef4ed093..a2ccd88573f4bd 100644
--- a/llvm/lib/Target/M68k/M68kInstrAtomics.td
+++ b/llvm/lib/Target/M68k/M68kInstrAtomics.td
@@ -13,6 +13,15 @@ foreach size = [8, 16, 32] in {
   def : Pat<(!cast<SDPatternOperator>("atomic_load_"#size) MxCP_ARII:$ptr),
             (!cast<MxInst>("MOV"#size#"df") !cast<MxMemOp>("MxARII"#size):$ptr)>;
 
+  def : Pat<(!cast<SDPatternOperator>("atomic_load_"#size) MxCP_ARID:$ptr),
+            (!cast<MxInst>("MOV"#size#"dp") !cast<MxMemOp>("MxARID"#size):$ptr)>;
+
+  def : Pat<(!cast<SDPatternOperator>("atomic_load_"#size) MxCP_PCD:$ptr),
+            (!cast<MxInst>("MOV"#size#"dq") !cast<MxMemOp>("MxPCD"#size):$ptr)>;
+
+  def : Pat<(!cast<SDPatternOperator>("atomic_load_"#size) MxCP_PCI:$ptr),
+            (!cast<MxInst>("MOV"#size#"dk") !cast<MxMemOp>("MxPCI"#size):$ptr)>;
+
   def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_ARI:$ptr),
             (!cast<MxInst>("MOV"#size#"jd") !cast<MxMemOp>("MxARI"#size):$ptr,
                                             !cast<MxRegOp>("MxDRD"#size):$val)>;
@@ -20,10 +29,22 @@ foreach size = [8, 16, 32] in {
   def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_ARII:$ptr),
             (!cast<MxInst>("MOV"#size#"fd") !cast<MxMemOp>("MxARII"#size):$ptr,
                                             !cast<MxRegOp>("MxDRD"#size):$val)>;
+
+  def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_ARID:$ptr),
+            (!cast<MxInst>("MOV"#size#"pd") !cast<MxMemOp>("MxARID"#size):$ptr,
+                                            !cast<MxRegOp>("MxDRD"#size):$val)>;
+
+  def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_PCD:$ptr),
+            (!cast<MxInst>("MOV"#size#"qd") !cast<MxMemOp>("MxPCD"#size):$ptr,
+                                            !cast<MxRegOp>("MxDRD"#size):$val)>;                                   
+
+  def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_PCI:$ptr),
+            (!cast<MxInst>("MOV"#size#"kd") !cast<MxMemOp>("MxPCI"#size):$ptr,
+                                            !cast<MxRegOp>("MxDRD"#size):$val)>;                               
 }
 
 let Predicates = [AtLeastM68020] in {
-class MxCASOp<bits<2> size_encoding, MxType type>
+class MxCASARIOp<bits<2> size_encoding, MxType type>
     : MxInst<(outs type.ROp:$out),
              (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxARI"#type.Size):$mem),
              "cas."#type.Prefix#" $dc, $du, $mem"> {
@@ -36,17 +57,128 @@ class MxCASOp<bits<2> size_encoding, MxType type>
   let mayStore = 1;
 }
 
-def CAS8  : MxCASOp<0x1, MxType8d>;
-def CAS16 : MxCASOp<0x2, MxType16d>;
-def CAS32 : MxCASOp<0x3, MxType32d>;
+def CASARI8  : MxCASARIOp<0x1, MxType8d>;
+def CASARI16 : MxCASARIOp<0x2, MxType16d>;
+def CASARI32 : MxCASARIOp<0x3, MxType32d>;
+
+class MxCASARIDOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxARID"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_p<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def CASARID8  : MxCASARIDOp<0x1, MxType8d>;
+def CASARID16 : MxCASARIDOp<0x2, MxType16d>;
+def CASARID32 : MxCASARIDOp<0x3, MxType32d>;
+
+class MxCASARIIOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxARII"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_f<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def CASARII8  : MxCASARIIOp<0x1, MxType8d>;
+def CASARII16 : MxCASARIIOp<0x2, MxType16d>;
+def CASARII32 : MxCASARIIOp<0x3, MxType32d>;
 
+class MxCASPCIOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxPCI"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_k<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
 
+def CASPCI8  : MxCASPCIOp<0x1, MxType8d>;
+def CASPCI16 : MxCASPCIOp<0x2, MxType16d>;
+def CASPCI32 : MxCASPCIOp<0x3, MxType32d>;
+
+class MxCASPCDOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxPCD"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_q<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def CASPCD8  : MxCASPCDOp<0x1, MxType8d>;
+def CASPCD16 : MxCASPCDOp<0x2, MxType16d>;
+def CASPCD32 : MxCASPCDOp<0x3, MxType32d>;
+
+class MxCASALOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxMemOp>("MxAL"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_abs<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def CASAL8  : MxCASALOp<0x1, MxType8d>;
+def CASAL16 : MxCASALOp<0x2, MxType16d>;
+def CASAL32 : MxCASALOp<0x3, MxType32d>;
+
+foreach mode = ["ARI", "ARII", "ARID", "PCI", "PCD", "AL"] in {
 foreach size = [8, 16, 32] in {
-  def : Pat<(!cast<SDPatternOperator>("atomic_cmp_swap_i"#size) MxCP_ARI:$ptr,
+  def : Pat<(!cast<SDPatternOperator>("atomic_cmp_swap_i"#size) !cast<ComplexPattern>("MxCP_"#mode):$ptr,
                                                                 !cast<MxRegOp>("MxDRD"#size):$cmp,
                                                                 !cast<MxRegOp>("MxDRD"#size):$new),
-            (!cast<MxInst>("CAS"#size) !cast<MxRegOp>("MxDRD"#size):$cmp,
+            (!cast<MxInst>("CAS"#mode#size) !cast<MxRegOp>("MxDRD"#size):$cmp,
                                        !cast<MxRegOp>("MxDRD"#size):$new,
-                                       !cast<MxMemOp>("MxARI"#size):$ptr)>;
+                                       !cast<MxMemOp>("Mx"#mode#size):$ptr)>;
+} // size
+} // addr mode
+
+class MxCASARDOp<bits<2> size_encoding, MxType type>
+    : MxInst<(outs type.ROp:$out),
+             (ins type.ROp:$dc, type.ROp:$du, !cast<MxRegOp>("MxARD"#type.Size):$mem),
+             "cas."#type.Prefix#" $dc, $du, $mem"> {
+  let Inst = (ascend
+                (descend 0b00001, size_encoding, 0b011, MxEncAddrMode_a<"mem">.EA),
+                (descend 0b0000000, (operand "$du", 3), 0b000, (operand "$dc", 3))
+              );
+  let Constraints = "$out = $dc";
+  let mayLoad = 1;
+  let mayStore = 1;
 }
+
+def CASARD16 : MxCASARDOp<0x2, MxType16a>;
+def CASARD32 : MxCASARDOp<0x3, MxType32a>;
+
+foreach size = [16, 32] in {
+  def : Pat<(!cast<SDPatternOperator>("atomic_cmp_swap_i"#size) !cast<MxRegOp>("MxARD"#size):$ptr,
+                                                                !cast<MxRegOp>("MxDRD"#size):$cmp,
+                                                                !cast<MxRegOp>("MxDRD"#size):$new),
+            (!cast<MxInst>("CASARD"#size) !cast<MxRegOp>("MxDRD"#size):$cmp,
+                                       !cast<MxRegOp>("MxDRD"#size):$new,
+                                       !cast<MxRegOp>("MxARD"#size):$ptr)>;
+} // size
 } // let Predicates = [AtLeastM68020]
diff --git a/llvm/test/CodeGen/M68k/Atomics/load-store.ll b/llvm/test/CodeGen/M68k/Atomics/load-store.ll
index 23fdfad05cab5d..c00a1faf2634b4 100644
--- a/llvm/test/CodeGen/M68k/Atomics/load-store.ll
+++ b/llvm/test/CodeGen/M68k/Atomics/load-store.ll
@@ -604,3 +604,51 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %val) nounwind {
   store atomic i64 %val, ptr %a seq_cst, align 8
   ret void
 }
+
+define void @store_arid(ptr nonnull align 4 %a) {
+; NO-ATOMIC-LABEL: store_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    moveq #1, %d0
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l %d0, (32,%a0)
+; NO-ATOMIC-NEXT:    rts
+;
+; ATOMIC-LABEL: store_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    moveq #1, %d0
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l %d0, (32,%a0)
+; ATOMIC-NEXT:    rts
+start:
+  %1 = getelementptr inbounds i32, ptr %a, i32 8
+  store atomic i32 1, ptr %1 seq_cst, align 4
+  br label %exit
+
+exit:                                              ; preds = %start
+  ret void
+}
+
+define i32 @load_arid(ptr nonnull align 4 %a) {
+; NO-ATOMIC-LABEL: load_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    move.l (4,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (32,%a0), %d0
+; NO-ATOMIC-NEXT:    rts
+;
+; ATOMIC-LABEL: load_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    move.l (4,%sp), %a0
+; ATOMIC-NEXT:    move.l (32,%a0), %d0
+; ATOMIC-NEXT:    rts
+start:
+  %1 = getelementptr inbounds i32, ptr %a, i32 8
+  %2 = load atomic i32, ptr %1 seq_cst, align 4
+  br label %exit
+
+exit:                                              ; preds = %start
+  ret i32 %2
+}
diff --git a/llvm/test/CodeGen/M68k/Atomics/rmw.ll b/llvm/test/CodeGen/M68k/Atomics/rmw.ll
index ce456f0960eec1..a277b8fe72ae47 100644
--- a/llvm/test/CodeGen/M68k/Atomics/rmw.ll
+++ b/llvm/test/CodeGen/M68k/Atomics/rmw.ll
@@ -588,3 +588,144 @@ entry:
   %old = atomicrmw xchg ptr %ptr, i32 %val monotonic
   ret i32 %old
 }
+
+define i8 @atomicrmw_sub_i8_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i8_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_1
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i8_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.b (4,%a0), %d1
+; ATOMIC-NEXT:    move.b %d1, %d0
+; ATOMIC-NEXT:  .LBB12_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.b %d1, %d2
+; ATOMIC-NEXT:    add.b #-1, %d2
+; ATOMIC-NEXT:    cas.b %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.b %d0, %d2
+; ATOMIC-NEXT:    sub.b %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.b %d0, %d1
+; ATOMIC-NEXT:    bne .LBB12_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i8 1 release, align 4
+  ret i8 %6
+}
+
+define i16 @atomicrmw_sub_i16_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i16_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_2
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i16_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.w (4,%a0), %d1
+; ATOMIC-NEXT:    move.w %d1, %d0
+; ATOMIC-NEXT:  .LBB13_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.w %d1, %d2
+; ATOMIC-NEXT:    add.w #-1, %d2
+; ATOMIC-NEXT:    cas.w %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.w %d0, %d2
+; ATOMIC-NEXT:    sub.w %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.w %d0, %d1
+; ATOMIC-NEXT:    bne .LBB13_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i16 1 release, align 4
+  ret i16 %6
+}
+
+define i32 @atomicrmw_sub_i32_arid(ptr align 2 %self) {
+; NO-ATOMIC-LABEL: atomicrmw_sub_i32_arid:
+; NO-ATOMIC:         .cfi_startproc
+; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
+; NO-ATOMIC-NEXT:    suba.l #12, %sp
+; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
+; NO-ATOMIC-NEXT:    move.l (16,%sp), %a0
+; NO-ATOMIC-NEXT:    move.l (%a0), %d0
+; NO-ATOMIC-NEXT:    add.l #4, %d0
+; NO-ATOMIC-NEXT:    move.l %d0, (%sp)
+; NO-ATOMIC-NEXT:    move.l #1, (4,%sp)
+; NO-ATOMIC-NEXT:    jsr __sync_fetch_and_sub_4
+; NO-ATOMIC-NEXT:    adda.l #12, %sp
+; NO-ATOMIC-NEXT:    rts
+;
+; ATOMIC-LABEL: atomicrmw_sub_i32_arid:
+; ATOMIC:         .cfi_startproc
+; ATOMIC-NEXT:  ; %bb.0: ; %start
+; ATOMIC-NEXT:    suba.l #4, %sp
+; ATOMIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-NEXT:    move.l (8,%sp), %a0
+; ATOMIC-NEXT:    move.l (%a0), %a0
+; ATOMIC-NEXT:    move.l (4,%a0), %d1
+; ATOMIC-NEXT:    move.l %d1, %d0
+; ATOMIC-NEXT:  .LBB14_1: ; %atomicrmw.start
+; ATOMIC-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ATOMIC-NEXT:    move.l %d1, %d2
+; ATOMIC-NEXT:    add.l #-1, %d2
+; ATOMIC-NEXT:    cas.l %d0, %d2, (4,%a0)
+; ATOMIC-NEXT:    move.l %d0, %d2
+; ATOMIC-NEXT:    sub.l %d1, %d2
+; ATOMIC-NEXT:    seq %d1
+; ATOMIC-NEXT:    sub.b #1, %d1
+; ATOMIC-NEXT:    move.l %d0, %d1
+; ATOMIC-NEXT:    bne .LBB14_1
+; ATOMIC-NEXT:  ; %bb.2: ; %atomicrmw.end
+; ATOMIC-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; ATOMIC-NEXT:    adda.l #4, %sp
+; ATOMIC-NEXT:    rts
+start:
+  %self1 = load ptr, ptr %self, align 2
+  %_18.i.i = getelementptr inbounds i8, ptr %self1, i32 4
+  %6 = atomicrmw sub ptr %_18.i.i, i32 1 release, align 4
+  ret i32 %6
+}
diff --git a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
index 37ddc8e56dcdaf..e21364a8d71186 100644
--- a/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
+++ b/llvm/test/CodeGen/M68k/CodeModel/Large/Atomics/cmpxchg.ll
@@ -18,10 +18,9 @@ define { i32, i1 } @std_thread_new() {
 ; NO-ATOMIC-NEXT:  ; %bb.0: ; %start
 ; NO-ATOMIC-NEXT:    suba.l #12, %sp
 ; NO-ATOMIC-NEXT:    .cfi_def_cfa_offset -16
-; NO-ATOMIC-NEXT:    lea (thread_id,%pc), %a0
-; NO-ATOMIC-NEXT:    move.l %a0, (%sp)
 ; NO-ATOMIC-NEXT:    move.l #1, (8,%sp)
 ; NO-ATOMIC-NEXT:    move.l #0, (4,%sp)
+; NO-ATOMIC-NEXT:    move.l #thread_id, (%sp)
 ; NO-ATOMIC-NEXT:    jsr __sync_val_compare_and_swap_4
 ; NO-ATOMIC-NEXT:    cmpi.l #0, %d0
 ; NO-ATOMIC-NEXT:    seq %d1
@@ -33,7 +32,8 @@ define { i32, i1 } @std_thread_new() {
 ; NO-ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
 ; NO-ATOMIC-PIC-NEXT:    suba.l #12, %sp
 ; NO-ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -16
-; NO-ATOMIC-PIC-NEXT:    lea (thread_id,%pc), %a0
+; NO-ATOMIC-PIC-NEXT:    lea (_GLOBAL_OFFSET_TABLE_ at GOTPCREL,%pc), %a0
+; NO-ATOMIC-PIC-NEXT:    adda.l #thread_id at GOTOFF, %a0
 ; NO-ATOMIC-PIC-NEXT:    move.l %a0, (%sp)
 ; NO-ATOMIC-PIC-NEXT:    move.l #1, (8,%sp)
 ; NO-ATOMIC-PIC-NEXT:    move.l #0, (4,%sp)
@@ -46,7 +46,7 @@ define { i32, i1 } @std_thread_new() {
 ; ATOMIC-LABEL: std_thread_new:
 ; ATOMIC:         .cfi_startproc
 ; ATOMIC-NEXT:  ; %bb.0: ; %start
-; ATOMIC-NEXT:    lea (thread_id,%pc), %a0
+; ATOMIC-NEXT:    move.l #thread_id, %a0
 ; ATOMIC-NEXT:    moveq #1, %d1
 ; ATOMIC-NEXT:    moveq #0, %d0
 ; ATOMIC-NEXT:    cas.l %d0, %d1, (%a0)
@@ -57,12 +57,19 @@ define { i32, i1 } @std_thread_new() {
 ; ATOMIC-PIC-LABEL: std_thread_new:
 ; ATOMIC-PIC:         .cfi_startproc
 ; ATOMIC-PIC-NEXT:  ; %bb.0: ; %start
-; ATOMIC-PIC-NEXT:    lea (thread_id,%pc), %a0
-; ATOMIC-PIC-NEXT:    moveq #1, %d1
-; ATOMIC-PIC-NEXT:    moveq #0, %d0
-; ATOMIC-PIC-NEXT:    cas.l %d0, %d1, (%a0)
+; ATOMIC-PIC-NEXT:    suba.l #4, %sp
+; ATOMIC-PIC-NEXT:    .cfi_def_cfa_offset -8
+; ATOMIC-PIC-NEXT:    movem.l %a2, (0,%sp) ; 8-byte Folded Spill
+; ATOMIC-PIC-NEXT:    lea (_GLOBAL_OFFSET_TABLE_ at GOTPCREL,%pc), %a0
+; ATOMIC-PIC-NEXT:    adda.l #thread_id at GOTOFF, %a0
+; ATOMIC-PIC-NEXT:    move.w #1, %a1
+; ATOMIC-PIC-NEXT:    suba.l %a2, %a2
+; ATOMIC-PIC-NEXT:    cas.l %a2, %a1, %a0
+; ATOMIC-PIC-NEXT:    move.l %a2, %d0
 ; ATOMIC-PIC-NEXT:    cmpi.l #0, %d0
 ; ATOMIC-PIC-NEXT:    seq %d1
+; ATOMIC-PIC-NEXT:    movem.l (0,%sp), %a2 ; 8-byte Folded Reload
+; ATOMIC-PIC-NEXT:    adda.l #4, %sp
 ; ATOMIC-PIC-NEXT:    rts
 start:
   %1 = cmpxchg ptr @thread_id, i32 0, i32 1 acquire monotonic, align 4
diff --git a/llvm/test/CodeGen/M68k/Control/non-cmov-switch.ll b/llvm/test/CodeGen/M68k/Control/non-cmov-switch.ll
new file mode 100644
index 00000000000000..90d2be017ecdb2
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/Control/non-cmov-switch.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=m68k-linux  -mcpu=M68020 --verify-machineinstrs | FileCheck %s
+
+define internal void @select_i32(i32 %self, ptr nonnull %value) {
+; CHECK-LABEL: select_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ; %bb.0: ; %start
+; CHECK-NEXT:    suba.l #4, %sp
+; CHECK-NEXT:    .cfi_def_cfa_offset -8
+; CHECK-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; CHECK-NEXT:    cmpi.l #0, (8,%sp)
+; CHECK-NEXT:    move.w %ccr, %d2
+; CHECK-NEXT:    sne %d1
+; CHECK-NEXT:    move.l (12,%sp), %d0
+; CHECK-NEXT:    move.w %d2, %ccr
+; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:  ; %bb.1: ; %start
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    cmpi.l #0, %d1
+; CHECK-NEXT:    bne .LBB0_3
+; CHECK-NEXT:  .LBB0_2: ; %null
+; CHECK-NEXT:    suba.l %a0, %a0
+; CHECK-NEXT:    move.l %d0, (%a0)
+; CHECK-NEXT:  .LBB0_3: ; %exit
+; CHECK-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; CHECK-NEXT:    adda.l #4, %sp
+; CHECK-NEXT:    rts
+start:
+  %2 = icmp eq i32 %self, 0
+  %3 = select i1 %2, i32 0, i32 1
+  switch i32 %3, label %exit [
+    i32 0, label %nonnull
+    i32 1, label %null
+  ]
+
+nonnull:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+null:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+exit:                                              ; preds = %nonnull, %null
+  ret void
+}
+
+define internal void @select_i16(i16 %self, ptr nonnull %value) {
+; CHECK-LABEL: select_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ; %bb.0: ; %start
+; CHECK-NEXT:    suba.l #4, %sp
+; CHECK-NEXT:    .cfi_def_cfa_offset -8
+; CHECK-NEXT:    movem.l %d2, (0,%sp) ; 8-byte Folded Spill
+; CHECK-NEXT:    cmpi.w #0, (10,%sp)
+; CHECK-NEXT:    move.w %ccr, %d2
+; CHECK-NEXT:    sne %d1
+; CHECK-NEXT:    move.l (12,%sp), %d0
+; CHECK-NEXT:    move.w %d2, %ccr
+; CHECK-NEXT:    bne .LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %start
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    cmpi.w #0, %d1
+; CHECK-NEXT:    bne .LBB1_3
+; CHECK-NEXT:  .LBB1_2: ; %null
+; CHECK-NEXT:    suba.l %a0, %a0
+; CHECK-NEXT:    move.l %d0, (%a0)
+; CHECK-NEXT:  .LBB1_3: ; %exit
+; CHECK-NEXT:    movem.l (0,%sp), %d2 ; 8-byte Folded Reload
+; CHECK-NEXT:    adda.l #4, %sp
+; CHECK-NEXT:    rts
+start:
+  %2 = icmp eq i16 %self, 0
+  %3 = select i1 %2, i16 0, i16 1
+  switch i16 %3, label %exit [
+    i16 0, label %nonnull
+    i16 1, label %null
+  ]
+
+nonnull:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+null:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+exit:                                              ; preds = %nonnull, %null
+  ret void
+}
+
+define internal void @select_i8(i8 %self, ptr nonnull %value) {
+; CHECK-LABEL: select_i8:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ; %bb.0: ; %start
+; CHECK-NEXT:    move.l (8,%sp), %d0
+; CHECK-NEXT:    cmpi.b #0, (7,%sp)
+; CHECK-NEXT:    sne %d1
+; CHECK-NEXT:    bne .LBB2_2
+; CHECK-NEXT:  ; %bb.1: ; %start
+; CHECK-NEXT:    cmpi.b #0, %d1
+; CHECK-NEXT:    bne .LBB2_3
+; CHECK-NEXT:  .LBB2_2: ; %null
+; CHECK-NEXT:    suba.l %a0, %a0
+; CHECK-NEXT:    move.l %d0, (%a0)
+; CHECK-NEXT:  .LBB2_3: ; %exit
+; CHECK-NEXT:    rts
+start:
+  %2 = icmp eq i8 %self, 0
+  %3 = select i1 %2, i8 0, i8 1
+  switch i8 %3, label %exit [
+    i8 0, label %nonnull
+    i8 1, label %null
+  ]
+
+nonnull:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+null:                                              ; preds = %start
+  store ptr %value, ptr null, align 2
+  br label %exit
+
+exit:                                              ; preds = %nonnull, %null
+  ret void
+}
diff --git a/llvm/test/CodeGen/M68k/TLS/tls-arid.ll b/llvm/test/CodeGen/M68k/TLS/tls-arid.ll
new file mode 100644
index 00000000000000..88189f648854b6
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/TLS/tls-arid.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=m68k < %s | FileCheck %s
+
+ at tls = internal thread_local global <{ [5 x i8], [1 x i8] }> <{ [5 x i8] zeroinitializer, [1 x i8] undef }>, align 4
+
+define i8 @tls_arid(ptr noundef nonnull %0) unnamed_addr #2 {
+; CHECK-LABEL: tls_arid:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ; %bb.0: ; %start
+; CHECK-NEXT:    suba.l #4, %sp
+; CHECK-NEXT:    .cfi_def_cfa_offset -8
+; CHECK-NEXT:    jsr __m68k_read_tp
+; CHECK-NEXT:    move.b (tls at TPOFF+4,%a0), %d0
+; CHECK-NEXT:    adda.l #4, %sp
+; CHECK-NEXT:    rts
+start:
+  %1 = load i8, ptr getelementptr inbounds (i8, ptr @tls, i32 4), align 4
+  ret i8 %1
+}



More information about the llvm-commits mailing list