[llvm] [AArch64][ISel] Add support for v8.4a RCpc `ldapur`/`stlur` (PR #67879)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 30 01:38:08 PDT 2023


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/67879

>From 9c86449b988e29535a1135f578ff397044f6be62 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 6 Oct 2023 15:06:58 +0000
Subject: [PATCH 1/2] [AArch64] Introduce tests for PR67879 (NFC)

---
 .../Atomics/aarch64-atomic-load-rcpc_immo.ll  | 956 ++++++++++++++++++
 .../Atomics/aarch64-atomic-store-rcpc_immo.ll | 385 +++++++
 2 files changed, 1341 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll

diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
new file mode 100644
index 000000000000000..a03ced98c1a9f4b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -0,0 +1,956 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -O0 | FileCheck %s --check-prefixes=CHECK,O0-SDAG
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -O1 | FileCheck %s --check-prefixes=CHECK,O1-SDAG
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
+
+define dso_local i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_unordered:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep unordered, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_unordered_const:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep unordered, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_monotonic:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep monotonic, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_monotonic_const:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep monotonic, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_acquire:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldaprb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep acquire, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_acquire_const:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldaprb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep acquire, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_seq_cst:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldarb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep seq_cst, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_aligned_seq_cst_const:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldarb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep seq_cst, align 1
+    ret i8 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_unordered:
+; CHECK:    ldrh w0, [x0, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep unordered, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_unordered_const:
+; CHECK:    ldrh w0, [x0, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep unordered, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_monotonic:
+; CHECK:    ldrh w0, [x0, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep monotonic, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_monotonic_const:
+; CHECK:    ldrh w0, [x0, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep monotonic, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_acquire:
+; CHECK:    add x8, x0, #8
+; CHECK:    ldaprh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep acquire, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_acquire_const:
+; CHECK:    add x8, x0, #8
+; CHECK:    ldaprh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep acquire, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_seq_cst:
+; CHECK:    add x8, x0, #8
+; CHECK:    ldarh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep seq_cst, align 2
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i16_aligned_seq_cst_const:
+; CHECK:    add x8, x0, #8
+; CHECK:    ldarh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep seq_cst, align 2
+    ret i16 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_unordered:
+; CHECK:    ldr w0, [x0, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep unordered, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_unordered_const:
+; CHECK:    ldr w0, [x0, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep unordered, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_monotonic:
+; CHECK:    ldr w0, [x0, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep monotonic, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_monotonic_const:
+; CHECK:    ldr w0, [x0, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep monotonic, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_acquire:
+; CHECK:    add x8, x0, #16
+; CHECK:    ldapr w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep acquire, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_acquire_const:
+; CHECK:    add x8, x0, #16
+; CHECK:    ldapr w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep acquire, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_seq_cst:
+; CHECK:    add x8, x0, #16
+; CHECK:    ldar w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep seq_cst, align 4
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i32_aligned_seq_cst_const:
+; CHECK:    add x8, x0, #16
+; CHECK:    ldar w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep seq_cst, align 4
+    ret i32 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_unordered:
+; CHECK:    ldr x0, [x0, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep unordered, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_unordered_const:
+; CHECK:    ldr x0, [x0, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep unordered, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_monotonic:
+; CHECK:    ldr x0, [x0, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep monotonic, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_monotonic_const:
+; CHECK:    ldr x0, [x0, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep monotonic, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_acquire:
+; CHECK:    add x8, x0, #32
+; CHECK:    ldapr x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep acquire, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_acquire_const:
+; CHECK:    add x8, x0, #32
+; CHECK:    ldapr x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep acquire, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_seq_cst:
+; CHECK:    add x8, x0, #32
+; CHECK:    ldar x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep seq_cst, align 8
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i64_aligned_seq_cst_const:
+; CHECK:    add x8, x0, #32
+; CHECK:    ldar x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep seq_cst, align 8
+    ret i64 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_unordered:
+; CHECK:    ldp x0, x1, [x0, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep unordered, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
+; CHECK:    ldp x0, x1, [x0, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep unordered, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
+; CHECK:    ldp x0, x1, [x0, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep monotonic, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
+; CHECK:    ldp x0, x1, [x0, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep monotonic, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_acquire:
+; CHECK:    ldp x0, x1, [x0, #64]
+; CHECK:    dmb ishld
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep acquire, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
+; CHECK:    ldp x0, x1, [x0, #64]
+; CHECK:    dmb ishld
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep acquire, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
+; CHECK:    ldp x0, x1, [x0, #64]
+; CHECK:    dmb ish
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep seq_cst, align 16
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
+; CHECK:    ldp x0, x1, [x0, #64]
+; CHECK:    dmb ish
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep seq_cst, align 16
+    ret i128 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_unordered(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_unordered:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep unordered, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_unordered_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_unordered_const:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep unordered, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_monotonic(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_monotonic:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep monotonic, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_monotonic_const:
+; CHECK:    ldrb w0, [x0, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep monotonic, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_acquire:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldaprb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep acquire, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldaprb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep acquire, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_seq_cst(ptr %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_seq_cst:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldarb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep seq_cst, align 1
+    ret i8 %r
+}
+
+define dso_local i8 @load_atomic_i8_unaligned_seq_cst_const(ptr readonly %ptr) {
+; CHECK-LABEL: load_atomic_i8_unaligned_seq_cst_const:
+; CHECK:    add x8, x0, #4
+; CHECK:    ldarb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i8, ptr %gep seq_cst, align 1
+    ret i8 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_unordered(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_unordered:
+; O0-SDAG:    add x1, x8, #4
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_unordered:
+; O1-SDAG:    add x1, x0, #4
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_unordered:
+; GISEL:    add x1, x8, #4
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep unordered, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_unordered_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_unordered_const:
+; O0-SDAG:    add x1, x8, #4
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_unordered_const:
+; O1-SDAG:    add x1, x0, #4
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_unordered_const:
+; GISEL:    add x1, x8, #4
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep unordered, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_monotonic(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_monotonic:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_monotonic:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_monotonic:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep monotonic, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_monotonic_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_monotonic_const:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_monotonic_const:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_monotonic_const:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep monotonic, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_acquire(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_acquire:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_acquire:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_acquire:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep acquire, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_acquire_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_acquire_const:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_acquire_const:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_acquire_const:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep acquire, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_seq_cst(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_seq_cst:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_seq_cst:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_seq_cst:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep seq_cst, align 1
+    ret i16 %r
+}
+
+define dso_local i16 @load_atomic_i16_unaligned_seq_cst_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i16_unaligned_seq_cst_const:
+; O0-SDAG:    add x1, x8, #8
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i16_unaligned_seq_cst_const:
+; O1-SDAG:    add x1, x0, #8
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i16_unaligned_seq_cst_const:
+; GISEL:    add x1, x8, #8
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    %r = load atomic i16, ptr %gep seq_cst, align 1
+    ret i16 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_unordered(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_unordered:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_unordered:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_unordered:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep unordered, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_unordered_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_unordered_const:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_unordered_const:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_unordered_const:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep unordered, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_monotonic(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_monotonic:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_monotonic:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_monotonic:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep monotonic, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_monotonic_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_monotonic_const:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_monotonic_const:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_monotonic_const:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep monotonic, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_acquire(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_acquire:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_acquire:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_acquire:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep acquire, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_acquire_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_acquire_const:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_acquire_const:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_acquire_const:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep acquire, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_seq_cst(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_seq_cst:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_seq_cst:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_seq_cst:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep seq_cst, align 1
+    ret i32 %r
+}
+
+define dso_local i32 @load_atomic_i32_unaligned_seq_cst_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i32_unaligned_seq_cst_const:
+; O0-SDAG:    add x1, x8, #16
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i32_unaligned_seq_cst_const:
+; O1-SDAG:    add x1, x0, #16
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i32_unaligned_seq_cst_const:
+; GISEL:    add x1, x8, #16
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    %r = load atomic i32, ptr %gep seq_cst, align 1
+    ret i32 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_unordered(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_unordered:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_unordered:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_unordered:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep unordered, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_unordered_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_unordered_const:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_unordered_const:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_unordered_const:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep unordered, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_monotonic(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_monotonic:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_monotonic:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_monotonic:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep monotonic, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_monotonic_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_monotonic_const:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_monotonic_const:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_monotonic_const:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep monotonic, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_acquire(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_acquire:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_acquire:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_acquire:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep acquire, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_acquire_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_acquire_const:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_acquire_const:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_acquire_const:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep acquire, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_seq_cst(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_seq_cst:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_seq_cst:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_seq_cst:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep seq_cst, align 1
+    ret i64 %r
+}
+
+define dso_local i64 @load_atomic_i64_unaligned_seq_cst_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i64_unaligned_seq_cst_const:
+; O0-SDAG:    add x1, x8, #32
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i64_unaligned_seq_cst_const:
+; O1-SDAG:    add x1, x0, #32
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i64_unaligned_seq_cst_const:
+; GISEL:    add x1, x8, #32
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    %r = load atomic i64, ptr %gep seq_cst, align 1
+    ret i64 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_unordered(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_unordered:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_unordered:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_unordered:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep unordered, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_unordered_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_unordered_const:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_unordered_const:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_unordered_const:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep unordered, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_monotonic(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_monotonic:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_monotonic:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_monotonic:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep monotonic, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_monotonic_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_monotonic_const:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_monotonic_const:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_monotonic_const:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep monotonic, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_acquire(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_acquire:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_acquire:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_acquire:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep acquire, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_acquire_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_acquire_const:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_acquire_const:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_acquire_const:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep acquire, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_seq_cst(ptr %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_seq_cst:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_seq_cst:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_seq_cst:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep seq_cst, align 1
+    ret i128 %r
+}
+
+define dso_local i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %ptr) {
+; O0-SDAG-LABEL: load_atomic_i128_unaligned_seq_cst_const:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_load
+;
+; O1-SDAG-LABEL: load_atomic_i128_unaligned_seq_cst_const:
+; O1-SDAG:    add x1, x0, #64
+; O1-SDAG:    bl __atomic_load
+;
+; GISEL-LABEL: load_atomic_i128_unaligned_seq_cst_const:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_load
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    %r = load atomic i128, ptr %gep seq_cst, align 1
+    ret i128 %r
+}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll
new file mode 100644
index 000000000000000..6b99d21fdec3ecd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -O0 | FileCheck %s --check-prefixes=CHECK,O0-SDAG
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -O1 | FileCheck %s --check-prefixes=CHECK,O1-SDAG
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
+
+define dso_local void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_aligned_unordered:
+; CHECK:    strb w0, [x1, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_aligned_monotonic(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_aligned_monotonic:
+; CHECK:    strb w0, [x1, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_aligned_release:
+; CHECK:    add x8, x1, #4
+; CHECK:    stlrb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_aligned_seq_cst(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_aligned_seq_cst:
+; CHECK:    add x8, x1, #4
+; CHECK:    stlrb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep seq_cst, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i16_aligned_unordered(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_aligned_unordered:
+; CHECK:    strh w0, [x1, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep unordered, align 2
+    ret void
+}
+
+define dso_local void @store_atomic_i16_aligned_monotonic(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_aligned_monotonic:
+; CHECK:    strh w0, [x1, #8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep monotonic, align 2
+    ret void
+}
+
+define dso_local void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_aligned_release:
+; CHECK:    add x8, x1, #8
+; CHECK:    stlrh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep release, align 2
+    ret void
+}
+
+define dso_local void @store_atomic_i16_aligned_seq_cst(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_aligned_seq_cst:
+; CHECK:    add x8, x1, #8
+; CHECK:    stlrh w0, [x8]
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep seq_cst, align 2
+    ret void
+}
+
+define dso_local void @store_atomic_i32_aligned_unordered(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_aligned_unordered:
+; CHECK:    str w0, [x1, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep unordered, align 4
+    ret void
+}
+
+define dso_local void @store_atomic_i32_aligned_monotonic(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_aligned_monotonic:
+; CHECK:    str w0, [x1, #16]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep monotonic, align 4
+    ret void
+}
+
+define dso_local void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_aligned_release:
+; CHECK:    add x8, x1, #16
+; CHECK:    stlr w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep release, align 4
+    ret void
+}
+
+define dso_local void @store_atomic_i32_aligned_seq_cst(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_aligned_seq_cst:
+; CHECK:    add x8, x1, #16
+; CHECK:    stlr w0, [x8]
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep seq_cst, align 4
+    ret void
+}
+
+define dso_local void @store_atomic_i64_aligned_unordered(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_aligned_unordered:
+; CHECK:    str x0, [x1, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep unordered, align 8
+    ret void
+}
+
+define dso_local void @store_atomic_i64_aligned_monotonic(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_aligned_monotonic:
+; CHECK:    str x0, [x1, #32]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep monotonic, align 8
+    ret void
+}
+
+define dso_local void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_aligned_release:
+; CHECK:    add x8, x1, #32
+; CHECK:    stlr x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep release, align 8
+    ret void
+}
+
+define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_aligned_seq_cst:
+; CHECK:    add x8, x1, #32
+; CHECK:    stlr x0, [x8]
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep seq_cst, align 8
+    ret void
+}
+
+define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i128_aligned_unordered:
+; CHECK:    stp x0, x1, [x2, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep unordered, align 16
+    ret void
+}
+
+define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
+; CHECK:    stp x0, x1, [x2, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep monotonic, align 16
+    ret void
+}
+
+define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i128_aligned_release:
+; CHECK:    dmb ish
+; CHECK:    stp x0, x1, [x2, #64]
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep release, align 16
+    ret void
+}
+
+define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
+; CHECK:    dmb ish
+; CHECK:    stp x0, x1, [x2, #64]
+; CHECK:    dmb ish
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep seq_cst, align 16
+    ret void
+}
+
+define dso_local void @store_atomic_i8_unaligned_unordered(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_unaligned_unordered:
+; CHECK:    strb w0, [x1, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_unaligned_monotonic(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_unaligned_monotonic:
+; CHECK:    strb w0, [x1, #4]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_unaligned_release:
+; CHECK:    add x8, x1, #4
+; CHECK:    stlrb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i8_unaligned_seq_cst(i8 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i8_unaligned_seq_cst:
+; CHECK:    add x8, x1, #4
+; CHECK:    stlrb w0, [x8]
+    %gep = getelementptr inbounds i8, ptr %ptr, i32 4
+    store atomic i8 %value, ptr %gep seq_cst, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i16_unaligned_unordered(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_unaligned_unordered:
+; CHECK:    add x1, x1, #8
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i16_unaligned_monotonic(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_unaligned_monotonic:
+; CHECK:    add x1, x1, #8
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i16_unaligned_release(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_unaligned_release:
+; CHECK:    add x1, x1, #8
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i16_unaligned_seq_cst(i16 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i16_unaligned_seq_cst:
+; CHECK:    add x1, x1, #8
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i16, ptr %ptr, i32 4
+    store atomic i16 %value, ptr %gep seq_cst, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i32_unaligned_unordered(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_unaligned_unordered:
+; CHECK:    add x1, x1, #16
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i32_unaligned_monotonic(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_unaligned_monotonic:
+; CHECK:    add x1, x1, #16
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i32_unaligned_release(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_unaligned_release:
+; CHECK:    add x1, x1, #16
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i32_unaligned_seq_cst(i32 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i32_unaligned_seq_cst:
+; CHECK:    add x1, x1, #16
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i32, ptr %ptr, i32 4
+    store atomic i32 %value, ptr %gep seq_cst, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i64_unaligned_unordered(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_unaligned_unordered:
+; CHECK:    add x1, x1, #32
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i64_unaligned_monotonic(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_unaligned_monotonic:
+; CHECK:    add x1, x1, #32
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i64_unaligned_release(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_unaligned_release:
+; CHECK:    add x1, x1, #32
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i64_unaligned_seq_cst(i64 %value, ptr %ptr) {
+; CHECK-LABEL: store_atomic_i64_unaligned_seq_cst:
+; CHECK:    add x1, x1, #32
+; CHECK:    bl __atomic_store
+    %gep = getelementptr inbounds i64, ptr %ptr, i32 4
+    store atomic i64 %value, ptr %gep seq_cst, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i128_unaligned_unordered(i128 %value, ptr %ptr) {
+; O0-SDAG-LABEL: store_atomic_i128_unaligned_unordered:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_store
+;
+; O1-SDAG-LABEL: store_atomic_i128_unaligned_unordered:
+; O1-SDAG:    add x1, x2, #64
+; O1-SDAG:    bl __atomic_store
+;
+; GISEL-LABEL: store_atomic_i128_unaligned_unordered:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_store
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep unordered, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i128_unaligned_monotonic(i128 %value, ptr %ptr) {
+; O0-SDAG-LABEL: store_atomic_i128_unaligned_monotonic:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_store
+;
+; O1-SDAG-LABEL: store_atomic_i128_unaligned_monotonic:
+; O1-SDAG:    add x1, x2, #64
+; O1-SDAG:    bl __atomic_store
+;
+; GISEL-LABEL: store_atomic_i128_unaligned_monotonic:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_store
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep monotonic, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i128_unaligned_release(i128 %value, ptr %ptr) {
+; O0-SDAG-LABEL: store_atomic_i128_unaligned_release:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_store
+;
+; O1-SDAG-LABEL: store_atomic_i128_unaligned_release:
+; O1-SDAG:    add x1, x2, #64
+; O1-SDAG:    bl __atomic_store
+;
+; GISEL-LABEL: store_atomic_i128_unaligned_release:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_store
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep release, align 1
+    ret void
+}
+
+define dso_local void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr) {
+; O0-SDAG-LABEL: store_atomic_i128_unaligned_seq_cst:
+; O0-SDAG:    add x1, x8, #64
+; O0-SDAG:    bl __atomic_store
+;
+; O1-SDAG-LABEL: store_atomic_i128_unaligned_seq_cst:
+; O1-SDAG:    add x1, x2, #64
+; O1-SDAG:    bl __atomic_store
+;
+; GISEL-LABEL: store_atomic_i128_unaligned_seq_cst:
+; GISEL:    add x1, x8, #64
+; GISEL:    bl __atomic_store
+    %gep = getelementptr inbounds i128, ptr %ptr, i32 4
+    store atomic i128 %value, ptr %gep seq_cst, align 1
+    ret void
+}

>From 211d35eee4a83c5ec63d9257ea496ba3c4209d58 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 6 Oct 2023 15:07:27 +0000
Subject: [PATCH 2/2] [AArch64] Add support for v8.4a `ldapur`/`stlur`

AArch64 backend now features v8.4a atomic Load-Acquire
RCpc and Store-Release register unscaled support.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 18 ++++++-----
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 31 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  2 +-
 .../GISel/AArch64InstructionSelector.cpp      |  3 --
 .../Atomics/aarch64-atomic-load-rcpc_immo.ll  | 30 ++++++------------
 .../Atomics/aarch64-atomic-store-rcpc_immo.ll | 30 ++++++------------
 6 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 38759a2474518fc..7617dccdeee397f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -997,6 +997,15 @@ static bool isWorthFoldingADDlow(SDValue N) {
   return true;
 }
 
+/// Check if the immediate offset is valid as a scaled immediate.
+static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
+                                     unsigned Size) {
+  if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
+      Offset < (Range << Log2_32(Size)))
+    return true;
+  return false;
+}
+
 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
 /// immediate" address.  The "Size" argument is the size in bytes of the memory
 /// reference, which determines the scale.
@@ -1092,7 +1101,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       int64_t RHSC = (int64_t)RHS->getZExtValue();
       unsigned Scale = Log2_32(Size);
-      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
+      if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
         Base = N.getOperand(0);
         if (Base.getOpcode() == ISD::FrameIndex) {
           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -1130,10 +1139,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
     return false;
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     int64_t RHSC = RHS->getSExtValue();
-    // If the offset is valid as a scaled immediate, don't match here.
-    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
-        RHSC < (0x1000 << Log2_32(Size)))
-      return false;
     if (RHSC >= -256 && RHSC < 256) {
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
@@ -1312,11 +1317,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   //     LDR  X2, [BaseReg, X0]
   if (isa<ConstantSDNode>(RHS)) {
     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
-    unsigned Scale = Log2_32(Size);
     // Skip the immediate can be selected by load/store addressing mode.
     // Also skip the immediate can be encoded by a single ADD (SUB is also
     // checked by using -ImmOff).
-    if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
+    if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
       return false;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index fa5a8515ed92eca..0002db52b1995c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -573,3 +573,34 @@ let Predicates = [HasRCPC3, HasNEON] in {
                 (i64 (bitconvert (v1f64 VecListOne64:$Vt)))),
             (STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>;
 }
+
+// v8.4a FEAT_LRCPC2 patterns
+let Predicates = [HasRCPC_IMMO] in {
+  // Load-Acquire RCpc Register unscaled loads
+  def : Pat<(acquiring_load<atomic_load_az_8>
+               (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+          (LDAPURBi GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(acquiring_load<atomic_load_az_16>
+               (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+          (LDAPURHi GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(acquiring_load<atomic_load_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+          (LDAPURi GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(acquiring_load<atomic_load_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+          (LDAPURXi GPR64sp:$Rn, simm9:$offset)>;
+
+  // Store-Release Register unscaled stores
+  def : Pat<(releasing_store<atomic_store_8>
+               (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+          (STLURBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(releasing_store<atomic_store_16>
+               (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+          (STLURHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(releasing_store<atomic_store_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+          (STLURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(releasing_store<atomic_store_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
+          (STLURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ee42612c0fcdd2a..069a283dd311e50 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -94,7 +94,7 @@ def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
 def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
                        AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
 
-def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
+def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
                        AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
 
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 2089bfba5ff37c6..88516967515a58b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -7397,9 +7397,6 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
     return std::nullopt;
   RHSC = RHSOp1.getCImm()->getSExtValue();
 
-  // If the offset is valid as a scaled immediate, don't match here.
-  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
-    return std::nullopt;
   if (RHSC >= -256 && RHSC < 256) {
     MachineOperand &Base = RootDef->getOperand(1);
     return {{
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index a03ced98c1a9f4b..6912ab31e35b451 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -37,8 +37,7 @@ define dso_local i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) {
 
 define dso_local i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i8_aligned_acquire:
-; CHECK:    add x8, x0, #4
-; CHECK:    ldaprb w0, [x8]
+; CHECK:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -46,8 +45,7 @@ define dso_local i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
 
 define dso_local i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i8_aligned_acquire_const:
-; CHECK:    add x8, x0, #4
-; CHECK:    ldaprb w0, [x8]
+; CHECK:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -105,8 +103,7 @@ define dso_local i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr)
 
 define dso_local i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i16_aligned_acquire:
-; CHECK:    add x8, x0, #8
-; CHECK:    ldaprh w0, [x8]
+; CHECK:    ldapurh w0, [x0, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 2
     ret i16 %r
@@ -114,8 +111,7 @@ define dso_local i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
 
 define dso_local i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i16_aligned_acquire_const:
-; CHECK:    add x8, x0, #8
-; CHECK:    ldaprh w0, [x8]
+; CHECK:    ldapurh w0, [x0, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 2
     ret i16 %r
@@ -173,8 +169,7 @@ define dso_local i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr)
 
 define dso_local i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i32_aligned_acquire:
-; CHECK:    add x8, x0, #16
-; CHECK:    ldapr w0, [x8]
+; CHECK:    ldapur w0, [x0, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 4
     ret i32 %r
@@ -182,8 +177,7 @@ define dso_local i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
 
 define dso_local i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i32_aligned_acquire_const:
-; CHECK:    add x8, x0, #16
-; CHECK:    ldapr w0, [x8]
+; CHECK:    ldapur w0, [x0, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 4
     ret i32 %r
@@ -241,8 +235,7 @@ define dso_local i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr)
 
 define dso_local i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i64_aligned_acquire:
-; CHECK:    add x8, x0, #32
-; CHECK:    ldapr x0, [x8]
+; CHECK:    ldapur x0, [x0, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 8
     ret i64 %r
@@ -250,8 +243,7 @@ define dso_local i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
 
 define dso_local i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i64_aligned_acquire_const:
-; CHECK:    add x8, x0, #32
-; CHECK:    ldapr x0, [x8]
+; CHECK:    ldapur x0, [x0, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 8
     ret i64 %r
@@ -377,8 +369,7 @@ define dso_local i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr)
 
 define dso_local i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i8_unaligned_acquire:
-; CHECK:    add x8, x0, #4
-; CHECK:    ldaprb w0, [x8]
+; CHECK:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -386,8 +377,7 @@ define dso_local i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
 
 define dso_local i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const:
-; CHECK:    add x8, x0, #4
-; CHECK:    ldaprb w0, [x8]
+; CHECK:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll
index 6b99d21fdec3ecd..24617c4c2198daa 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll
@@ -21,8 +21,7 @@ define dso_local void @store_atomic_i8_aligned_monotonic(i8 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i8_aligned_release:
-; CHECK:    add x8, x1, #4
-; CHECK:    stlrb w0, [x8]
+; CHECK:    stlurb w0, [x1, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     store atomic i8 %value, ptr %gep release, align 1
     ret void
@@ -30,8 +29,7 @@ define dso_local void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i8_aligned_seq_cst(i8 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i8_aligned_seq_cst:
-; CHECK:    add x8, x1, #4
-; CHECK:    stlrb w0, [x8]
+; CHECK:    stlurb w0, [x1, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     store atomic i8 %value, ptr %gep seq_cst, align 1
     ret void
@@ -55,8 +53,7 @@ define dso_local void @store_atomic_i16_aligned_monotonic(i16 %value, ptr %ptr)
 
 define dso_local void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i16_aligned_release:
-; CHECK:    add x8, x1, #8
-; CHECK:    stlrh w0, [x8]
+; CHECK:    stlurh w0, [x1, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     store atomic i16 %value, ptr %gep release, align 2
     ret void
@@ -64,8 +61,7 @@ define dso_local void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i16_aligned_seq_cst(i16 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i16_aligned_seq_cst:
-; CHECK:    add x8, x1, #8
-; CHECK:    stlrh w0, [x8]
+; CHECK:    stlurh w0, [x1, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     store atomic i16 %value, ptr %gep seq_cst, align 2
     ret void
@@ -89,8 +85,7 @@ define dso_local void @store_atomic_i32_aligned_monotonic(i32 %value, ptr %ptr)
 
 define dso_local void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i32_aligned_release:
-; CHECK:    add x8, x1, #16
-; CHECK:    stlr w0, [x8]
+; CHECK:    stlur w0, [x1, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     store atomic i32 %value, ptr %gep release, align 4
     ret void
@@ -98,8 +93,7 @@ define dso_local void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i32_aligned_seq_cst(i32 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i32_aligned_seq_cst:
-; CHECK:    add x8, x1, #16
-; CHECK:    stlr w0, [x8]
+; CHECK:    stlur w0, [x1, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     store atomic i32 %value, ptr %gep seq_cst, align 4
     ret void
@@ -123,8 +117,7 @@ define dso_local void @store_atomic_i64_aligned_monotonic(i64 %value, ptr %ptr)
 
 define dso_local void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i64_aligned_release:
-; CHECK:    add x8, x1, #32
-; CHECK:    stlr x0, [x8]
+; CHECK:    stlur x0, [x1, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     store atomic i64 %value, ptr %gep release, align 8
     ret void
@@ -132,8 +125,7 @@ define dso_local void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i64_aligned_seq_cst:
-; CHECK:    add x8, x1, #32
-; CHECK:    stlr x0, [x8]
+; CHECK:    stlur x0, [x1, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     store atomic i64 %value, ptr %gep seq_cst, align 8
     ret void
@@ -192,8 +184,7 @@ define dso_local void @store_atomic_i8_unaligned_monotonic(i8 %value, ptr %ptr)
 
 define dso_local void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i8_unaligned_release:
-; CHECK:    add x8, x1, #4
-; CHECK:    stlrb w0, [x8]
+; CHECK:    stlurb w0, [x1, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     store atomic i8 %value, ptr %gep release, align 1
     ret void
@@ -201,8 +192,7 @@ define dso_local void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i8_unaligned_seq_cst(i8 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i8_unaligned_seq_cst:
-; CHECK:    add x8, x1, #4
-; CHECK:    stlrb w0, [x8]
+; CHECK:    stlurb w0, [x1, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     store atomic i8 %value, ptr %gep seq_cst, align 1
     ret void



More information about the llvm-commits mailing list