[llvm] d9856b1 - [NFC][ARM] Regenerate arm64-atomic.ll test

via llvm-commits llvm-commits at lists.llvm.org
Sat May 1 03:11:14 PDT 2021


Author: LemonBoy
Date: 2021-05-01T12:09:14+02:00
New Revision: d9856b12f2be257f1b7aaccde71eb0421a1aaaf3

URL: https://github.com/llvm/llvm-project/commit/d9856b12f2be257f1b7aaccde71eb0421a1aaaf3
DIFF: https://github.com/llvm/llvm-project/commit/d9856b12f2be257f1b7aaccde71eb0421a1aaaf3.diff

LOG: [NFC][ARM] Regenerate arm64-atomic.ll test

Pre-requisite for D101163, the NOLSE-0O case shows registers being
spilled inside the rmw loop.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 92cf76a7be45b..7deab9ee1e074 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1,83 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O1
 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O0
 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O1
 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O0
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
-; CHECK-NOLSE-LABEL: val_compare_and_swap:
-; CHECK-NOLSE-O1: LBB0_1:
-; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
-; CHECK-NOLSE-O1:     b.ne    LBB0_4
-; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], w2, [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB0_1
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-; CHECK-NOLSE-O1: LBB0_4:
-; CHECK-NOLSE-O1:     clrex
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
-; CHECK-NOLSE-O0: LBB0_1:
-; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cmp     w0, w1
-; CHECK-NOLSE-O0:     b.ne    LBB0_3
-; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], w2, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB0_1
-; CHECK-NOLSE-O0: LBB0_3:
-; CHECK-NOLSE-O0:     ret
-
-; CHECK-LSE-LABEL: val_compare_and_swap:
-; CHECK-LSE-O1: casa w1, w2, [x0]
-; CHECK-LSE-O1: mov x0, x1
-
-; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
-; CHECK-LSE-O0: mov     x0, x1
-; CHECK-LSE-O0: casa    w0, w2, [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: val_compare_and_swap:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB0_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
+; CHECK-NOLSE-O1-NEXT:    b.ne LBB0_4
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NOLSE-O1-NEXT:    stxr w9, w2, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB0_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+; CHECK-NOLSE-O1-NEXT:  LBB0_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: val_compare_and_swap:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:  LBB0_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
+; CHECK-NOLSE-O0-NEXT:    b.ne LBB0_3
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB0_1
+; CHECK-NOLSE-O0-NEXT:  LBB0_3:
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: val_compare_and_swap:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    casa w1, w2, [x0]
+; CHECK-LSE-O1-NEXT:    mov x0, x1
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: val_compare_and_swap:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    mov x8, x0
+; CHECK-LSE-O0-NEXT:    mov x0, x1
+; CHECK-LSE-O0-NEXT:    casa w0, w2, [x8]
+; CHECK-LSE-O0-NEXT:    ret
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
   %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
 define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
-; CHECK-NOLSE-LABEL: val_compare_and_swap_from_load:
-; CHECK-NOLSE-O1:     ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-NOLSE-O1: LBB1_1:
-; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
-; CHECK-NOLSE-O1:     b.ne    LBB1_4
-; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB1_1
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-; CHECK-NOLSE-O1: LBB1_4:
-; CHECK-NOLSE-O1:     clrex
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
-; CHECK-NOLSE-O0:     ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-NOLSE-O0: LBB1_1:
-; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cmp     w0, w1
-; CHECK-NOLSE-O0:     b.ne    LBB1_3
-; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB1_1
-; CHECK-NOLSE-O0: LBB1_3:
-; CHECK-NOLSE-O0:     ret
-
-; CHECK-LSE-LABEL: val_compare_and_swap_from_load:
-; CHECK-LSE-O1: ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-LSE-O1: casa w1, [[NEW]], [x0]
-; CHECK-LSE-O1: mov x0, x1
-
-; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
-; CHECK-LSE-O0: mov     x0, x1
-; CHECK-LSE-O0: ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-LSE-O0: casa    w0, [[NEW]], [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_from_load:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldr w9, [x2]
+; CHECK-NOLSE-O1-NEXT:  LBB1_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
+; CHECK-NOLSE-O1-NEXT:    b.ne LBB1_4
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB1_1 Depth=1
+; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB1_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+; CHECK-NOLSE-O1-NEXT:  LBB1_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    ldr w10, [x2]
+; CHECK-NOLSE-O0-NEXT:  LBB1_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
+; CHECK-NOLSE-O0-NEXT:    b.ne LBB1_3
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, w10, [x9]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB1_1
+; CHECK-NOLSE-O0-NEXT:  LBB1_3:
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: val_compare_and_swap_from_load:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldr w8, [x2]
+; CHECK-LSE-O1-NEXT:    casa w1, w8, [x0]
+; CHECK-LSE-O1-NEXT:    mov x0, x1
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: val_compare_and_swap_from_load:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    mov x9, x0
+; CHECK-LSE-O0-NEXT:    mov x0, x1
+; CHECK-LSE-O0-NEXT:    ldr w8, [x2]
+; CHECK-LSE-O0-NEXT:    casa w0, w8, [x9]
+; CHECK-LSE-O0-NEXT:    ret
   %new = load i32, i32* %pnew
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
   %val = extractvalue { i32, i1 } %pair, 0
@@ -85,242 +110,436 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
 }
 
 define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
-; CHECK-NOLSE-LABEL: val_compare_and_swap_rel:
-; CHECK-NOLSE-O1: LBB2_1:
-; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
-; CHECK-NOLSE-O1:     b.ne    LBB2_4
-; CHECK-NOLSE-O1:     stlxr    [[STATUS:w[0-9]+]], w2, [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB2_1
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-; CHECK-NOLSE-O1: LBB2_4:
-; CHECK-NOLSE-O1:     clrex
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
-; CHECK-NOLSE-O0: LBB2_1:
-; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cmp     w0, w1
-; CHECK-NOLSE-O0:     b.ne    LBB2_3
-; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], w2, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB2_1
-; CHECK-NOLSE-O0: LBB2_3:
-; CHECK-NOLSE-O0:     ret
-
-; CHECK-LSE-LABEL: val_compare_and_swap_rel:
-; CHECK-LSE-O1: casal w1, w2, [x0]
-; CHECK-LSE-O1: mov x0, x1
-
-; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
-; CHECK-LSE-O0: mov     x0, x1
-; CHECK-LSE-O0: casal   w0, w2, [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_rel:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB2_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
+; CHECK-NOLSE-O1-NEXT:    b.ne LBB2_4
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-NOLSE-O1-NEXT:    stlxr w9, w2, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB2_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+; CHECK-NOLSE-O1-NEXT:  LBB2_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:  LBB2_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
+; CHECK-NOLSE-O0-NEXT:    b.ne LBB2_3
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB2_1
+; CHECK-NOLSE-O0-NEXT:  LBB2_3:
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: val_compare_and_swap_rel:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    casal w1, w2, [x0]
+; CHECK-LSE-O1-NEXT:    mov x0, x1
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: val_compare_and_swap_rel:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    mov x8, x0
+; CHECK-LSE-O0-NEXT:    mov x0, x1
+; CHECK-LSE-O0-NEXT:    casal w0, w2, [x8]
+; CHECK-LSE-O0-NEXT:    ret
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
   %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
 define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
-; CHECK-NOLSE-LABEL: val_compare_and_swap_64:
-; CHECK-NOLSE-O1: LBB3_1:
-; CHECK-NOLSE-O1:     ldxr   [[VAL:x[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     cmp     [[VAL]], x1
-; CHECK-NOLSE-O1:     b.ne    LBB3_4
-; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], x2, [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB3_1
-; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-; CHECK-NOLSE-O1: LBB3_4:
-; CHECK-NOLSE-O1:     clrex
-; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
-; CHECK-NOLSE-O0: LBB3_1:
-; CHECK-NOLSE-O0:     ldaxr   x0, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cmp     x0, x1
-; CHECK-NOLSE-O0:     b.ne    LBB3_3
-; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], x2, [x[[ADDR]]]
-; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB3_1
-; CHECK-NOLSE-O0: LBB3_3:
-; CHECK-NOLSE-O0:     ret
-
-; CHECK-LSE-LABEL: val_compare_and_swap_64:
-; CHECK-LSE-O1: cas x1, x2, [x0]
-; CHECK-LSE-O1: mov x0, x1
-
-; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
-; CHECK-LSE-O0: mov     x0, x1
-; CHECK-LSE-O0: cas     x0, x2, [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB3_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
+; CHECK-NOLSE-O1-NEXT:    b.ne LBB3_4
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB3_1 Depth=1
+; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB3_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov x0, x8
+; CHECK-NOLSE-O1-NEXT:    ret
+; CHECK-NOLSE-O1-NEXT:  LBB3_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov x0, x8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:  LBB3_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
+; CHECK-NOLSE-O0-NEXT:    b.ne LBB3_3
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB3_1 Depth=1
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB3_1
+; CHECK-NOLSE-O0-NEXT:  LBB3_3:
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: val_compare_and_swap_64:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    cas x1, x2, [x0]
+; CHECK-LSE-O1-NEXT:    mov x0, x1
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: val_compare_and_swap_64:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    mov x8, x0
+; CHECK-LSE-O0-NEXT:    mov x0, x1
+; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
+; CHECK-LSE-O0-NEXT:    ret
   %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
   %val = extractvalue { i64, i1 } %pair, 0
   ret i64 %val
 }
 
 define i32 @fetch_and_nand(i32* %p) #0 {
-; CHECK-NOLSE-LABEL: fetch_and_nand:
-; CHECK-NOLSE-O1: LBB4_1:
-; CHECK-NOLSE-O1:     ldxr    [[VAL:w[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     and     [[NEWTMP:w[0-9]+]], [[VAL]], #0x7
-; CHECK-NOLSE-O1:     mvn     [[NEW:w[0-9]+]], [[NEWTMP]]
-; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB4_1
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0: ldxr
-; CHECK-NOLSE-O0: stlxr
-
-; CHECK-LSE-LABEL: fetch_and_nand:
-; CHECK-LSE-O1: LBB4_1:
-; CHECK-LSE-O1:     ldxr    w[[VAL:[0-9]+]], [x0]
-; CHECK-LSE-O1:     and     [[NEWTMP:w[0-9]+]], w[[VAL]], #0x7
-; CHECK-LSE-O1:     mvn     [[NEW:w[0-9]+]], [[NEWTMP]]
-; CHECK-LSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-LSE-O1:     cbnz    [[STATUS]], LBB4_1
-; CHECK-LSE-O1:     mov     x0, x[[VAL]]
-
+; CHECK-NOLSE-O1-LABEL: fetch_and_nand:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB4_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0x7
+; CHECK-NOLSE-O1-NEXT:    mvn w9, w9
+; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB4_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: fetch_and_nand:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:  LBB4_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldxr w8, [x10]
+; CHECK-NOLSE-O0-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-NOLSE-O0-NEXT:    ; kill: def $w8 killed $w8 killed $x8
+; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #4] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:    and w8, w8, #0x7
+; CHECK-NOLSE-O0-NEXT:    mvn w9, w8
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, w9, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB4_1
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #4] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: fetch_and_nand:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:  LBB4_1: ; %atomicrmw.start
+; CHECK-LSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-LSE-O1-NEXT:    ldxr w8, [x0]
+; CHECK-LSE-O1-NEXT:    and w9, w8, #0x7
+; CHECK-LSE-O1-NEXT:    mvn w9, w9
+; CHECK-LSE-O1-NEXT:    stlxr w10, w9, [x0]
+; CHECK-LSE-O1-NEXT:    cbnz w10, LBB4_1
+; CHECK-LSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-LSE-O1-NEXT:    mov x0, x8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: fetch_and_nand:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-LSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-LSE-O0-NEXT:  LBB4_1: ; %atomicrmw.start
+; CHECK-LSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-LSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-LSE-O0-NEXT:    ldxr w8, [x10]
+; CHECK-LSE-O0-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-LSE-O0-NEXT:    ; kill: def $w8 killed $w8 killed $x8
+; CHECK-LSE-O0-NEXT:    str w8, [sp, #4] ; 4-byte Folded Spill
+; CHECK-LSE-O0-NEXT:    and w8, w8, #0x7
+; CHECK-LSE-O0-NEXT:    mvn w9, w8
+; CHECK-LSE-O0-NEXT:    stlxr w8, w9, [x10]
+; CHECK-LSE-O0-NEXT:    cbnz w8, LBB4_1
+; CHECK-LSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-LSE-O0-NEXT:    ldr w0, [sp, #4] ; 4-byte Folded Reload
+; CHECK-LSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-LSE-O0-NEXT:    ret
   %val = atomicrmw nand i32* %p, i32 7 release
   ret i32 %val
 }
 
 define i64 @fetch_and_nand_64(i64* %p) #0 {
-; CHECK-NOLSE-LABEL: fetch_and_nand_64
-; CHECK-NOLSE-O1: LBB5_1:
-; CHECK-NOLSE-O1:     ldaxr    [[VAL:x[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     and     [[NEWTMP:x[0-9]+]], [[VAL]], #0x7
-; CHECK-NOLSE-O1:     mvn     [[NEW:x[0-9]+]], [[NEWTMP]]
-; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB5_1
-; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0: ldaxr
-; CHECK-NOLSE-O0: stlxr
-
-; CHECK-LSE-LABEL: fetch_and_nand_64:
-; CHECK-LSE-O1: LBB5_1:
-; CHECK-LSE-O1:     ldaxr    [[VAL:x[0-9]+]], [x0]
-; CHECK-LSE-O1:     and     [[NEWTMP:x[0-9]+]], [[VAL]], #0x7
-; CHECK-LSE-O1:     mvn     [[NEW:x[0-9]+]], [[NEWTMP]]
-; CHECK-LSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-LSE-O1:     cbnz    [[STATUS]], LBB5_1
-; CHECK-LSE-O1:     mov     x0, [[VAL]]
-
+; CHECK-NOLSE-O1-LABEL: fetch_and_nand_64:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB5_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT:    and x9, x8, #0x7
+; CHECK-NOLSE-O1-NEXT:    mvn x9, x9
+; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB5_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT:    mov x0, x8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:  LBB5_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldaxr x8, [x10]
+; CHECK-NOLSE-O0-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:    and x8, x8, #0x7
+; CHECK-NOLSE-O0-NEXT:    mvn x9, x8
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, x9, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB5_1
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: fetch_and_nand_64:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:  LBB5_1: ; %atomicrmw.start
+; CHECK-LSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-LSE-O1-NEXT:    ldaxr x8, [x0]
+; CHECK-LSE-O1-NEXT:    and x9, x8, #0x7
+; CHECK-LSE-O1-NEXT:    mvn x9, x9
+; CHECK-LSE-O1-NEXT:    stlxr w10, x9, [x0]
+; CHECK-LSE-O1-NEXT:    cbnz w10, LBB5_1
+; CHECK-LSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-LSE-O1-NEXT:    mov x0, x8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: fetch_and_nand_64:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-LSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-LSE-O0-NEXT:  LBB5_1: ; %atomicrmw.start
+; CHECK-LSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-LSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-LSE-O0-NEXT:    ldaxr x8, [x10]
+; CHECK-LSE-O0-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-LSE-O0-NEXT:    and x8, x8, #0x7
+; CHECK-LSE-O0-NEXT:    mvn x9, x8
+; CHECK-LSE-O0-NEXT:    stlxr w8, x9, [x10]
+; CHECK-LSE-O0-NEXT:    cbnz w8, LBB5_1
+; CHECK-LSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-LSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-LSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-LSE-O0-NEXT:    ret
   %val = atomicrmw nand i64* %p, i64 7 acq_rel
   ret i64 %val
 }
 
 define i32 @fetch_and_or(i32* %p) #0 {
-; CHECK-NOLSE-LABEL: fetch_and_or:
-; CHECK-NOLSE-O1:     mov [[FIVE:w[0-9]+]], #5
-; CHECK-NOLSE-O1: LBB6_1:
-; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     orr     [[NEW:w[0-9]+]], [[VAL]], [[FIVE]]
-; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB6_1
-; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0: ldaxr
-; CHECK-NOLSE-O0: stlxr
-
+; CHECK-NOLSE-O1-LABEL: fetch_and_or:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    mov w9, #5
+; CHECK-NOLSE-O1-NEXT:  LBB6_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    orr w10, w8, w9
+; CHECK-NOLSE-O1-NEXT:    stlxr w11, w10, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB6_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT:    mov w0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: fetch_and_or:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:  LBB6_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldaxr w8, [x10]
+; CHECK-NOLSE-O0-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-NOLSE-O0-NEXT:    ; kill: def $w8 killed $w8 killed $x8
+; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #4] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:    mov w9, #5
+; CHECK-NOLSE-O0-NEXT:    orr w9, w8, w9
+; CHECK-NOLSE-O0-NEXT:    stlxr w8, w9, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB6_1
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #4] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: fetch_and_or:
 ; CHECK-LSE:       ; %bb.0:
-; CHECK-LSE:    mov w8, #5
-; CHECK-LSE:    ldsetal w8, w0, [x0]
-; CHECK-LSE:    ret
+; CHECK-LSE-NEXT:    mov w8, #5
+; CHECK-LSE-NEXT:    ldsetal w8, w0, [x0]
+; CHECK-LSE-NEXT:    ret
   %val = atomicrmw or i32* %p, i32 5 seq_cst
   ret i32 %val
 }
 
 define i64 @fetch_and_or_64(i64* %p) #0 {
-; CHECK-NOLSE-LABEL: fetch_and_or_64:
-; CHECK-NOLSE-O1: LBB7_1:
-; CHECK-NOLSE-O1:     ldxr   [[VAL:x[0-9]+]], [x0]
-; CHECK-NOLSE-O1:     orr     [[NEW:x[0-9]+]], [[VAL]], #0x7
-; CHECK-NOLSE-O1:     stxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB7_1
-; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
-; CHECK-NOLSE-O1:     ret
-
-; CHECK-NOLSE-O0: ldxr
-; CHECK-NOLSE-O0: stxr
-
+; CHECK-NOLSE-O1-LABEL: fetch_and_or_64:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:  LBB7_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT:    orr x9, x8, #0x7
+; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB7_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT:    mov x0, x8
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: fetch_and_or_64:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:  LBB7_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldxr x8, [x10]
+; CHECK-NOLSE-O0-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT:    orr x9, x8, #0x7
+; CHECK-NOLSE-O0-NEXT:    stxr w8, x9, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB7_1
+; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: fetch_and_or_64:
-; CHECK-LSE:    mov w[[SEVEN:[0-9]+]], #7
-; CHECK-LSE:    ldset x[[SEVEN]], x0, [x0]
-; CHECK-LSE:    ret
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    mov w8, #7
+; CHECK-LSE-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-LSE-NEXT:    ldset x8, x0, [x0]
+; CHECK-LSE-NEXT:    ret
   %val = atomicrmw or i64* %p, i64 7 monotonic
   ret i64 %val
 }
 
 define void @acquire_fence() #0 {
+;
 ; CHECK-NOLSE-LABEL: acquire_fence:
-; CHECK-NOLSE:    dmb ish
-; CHECK-NOLSE:    ret
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    dmb ish
+; CHECK-NOLSE-NEXT:    ret
 ;
 ; CHECK-LSE-LABEL: acquire_fence:
-; CHECK-LSE:    dmb ish
-; CHECK-LSE:    ret
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    dmb ish
+; CHECK-LSE-NEXT:    ret
    fence acquire
    ret void
 }
 
 define void @release_fence() #0 {
+;
 ; CHECK-NOLSE-LABEL: release_fence:
-; CHECK-NOLSE:    dmb ish
-; CHECK-NOLSE:    ret
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    dmb ish
+; CHECK-NOLSE-NEXT:    ret
 ;
 ; CHECK-LSE-LABEL: release_fence:
 ; CHECK-LSE:       ; %bb.0:
-; CHECK-LSE:    dmb ish
-; CHECK-LSE:    ret
+; CHECK-LSE-NEXT:    dmb ish
+; CHECK-LSE-NEXT:    ret
    fence release
    ret void
 }
 
 define void @seq_cst_fence() #0 {
-; CHECK-LABEL: seq_cst_fence:
-; CHECK-NOLSE:    dmb ish
-; CHECK-NOLSE:    ret
+;
+; CHECK-NOLSE-LABEL: seq_cst_fence:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    dmb ish
+; CHECK-NOLSE-NEXT:    ret
 ;
 ; CHECK-LSE-LABEL: seq_cst_fence:
 ; CHECK-LSE:       ; %bb.0:
-; CHECK-LSE:    dmb ish
-; CHECK-LSE:    ret
+; CHECK-LSE-NEXT:    dmb ish
+; CHECK-LSE-NEXT:    ret
    fence seq_cst
    ret void
 }
 
 define i32 @atomic_load(i32* %p) #0 {
-; CHECK-LABEL: atomic_load:
-; CHECK-NOLSE:    ldar w0, [x0]
-; CHECK-NOLSE:    ret
+;
+; CHECK-NOLSE-LABEL: atomic_load:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar w0, [x0]
+; CHECK-NOLSE-NEXT:    ret
 ;
 ; CHECK-LSE-LABEL: atomic_load:
-; CHECK-LSE:    ldar w0, [x0]
-; CHECK-LSE:    ret
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    ldar w0, [x0]
+; CHECK-LSE-NEXT:    ret
    %r = load atomic i32, i32* %p seq_cst, align 4
    ret i32 %r
 }
 
 define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 {
-; CHECK-NOLSE-LABEL: atomic_load_relaxed_8:
-; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x0, #4095]
-; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x0, w1, sxtw]
-; CHECK-NOLSE-O1: ldurb   {{w[0-9]+}}, [x0, #-256]
-; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x[[ADDR]]]
-
-; CHECK-LSE: ldrb
-; CHECK-LSE: ldrb
-; CHECK-LSE: ld{{u?}}rb
-; CHECK-LSE: ldrb
-
+; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldrb w8, [x0, #4095]
+; CHECK-NOLSE-O1-NEXT:    ldrb w9, [x0, w1, sxtw]
+; CHECK-NOLSE-O1-NEXT:    ldurb w10, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    ldrb w11, [x11]
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w9
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10
+; CHECK-NOLSE-O1-NEXT:    add w0, w8, w11
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x0, #4095]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw
+; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x8]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxtb
+; CHECK-NOLSE-O0-NEXT:    subs x9, x0, #256 ; =256
+; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxtb
+; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, uxtb
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: atomic_load_relaxed_8:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldrb w8, [x0, #4095]
+; CHECK-LSE-O1-NEXT:    ldrb w9, [x0, w1, sxtw]
+; CHECK-LSE-O1-NEXT:    add w8, w8, w9
+; CHECK-LSE-O1-NEXT:    ldurb w9, [x0, #-256]
+; CHECK-LSE-O1-NEXT:    add w8, w8, w9
+; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O1-NEXT:    ldrb w9, [x9]
+; CHECK-LSE-O1-NEXT:    add w0, w8, w9
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: atomic_load_relaxed_8:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldrb w9, [x0, #4095]
+; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw
+; CHECK-LSE-O0-NEXT:    ldrb w8, [x8]
+; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxtb
+; CHECK-LSE-O0-NEXT:    subs x9, x0, #256 ; =256
+; CHECK-LSE-O0-NEXT:    ldrb w9, [x9]
+; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxtb
+; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O0-NEXT:    ldrb w9, [x9]
+; CHECK-LSE-O0-NEXT:    add w0, w8, w9, uxtb
+; CHECK-LSE-O0-NEXT:    ret
   %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
   %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1
 
@@ -340,18 +559,57 @@ define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 {
 }
 
 define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 {
-; CHECK-NOLSE-LABEL: atomic_load_relaxed_16:
-; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x0, #8190]
-; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x0, w1, sxtw #1]
-; CHECK-NOLSE-O1: ldurh   {{w[0-9]+}}, [x0, #-256]
-; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12          ; =1191936
-; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x[[ADDR]]]
-
-; CHECK-LSE: ldrh
-; CHECK-LSE: ldrh
-; CHECK-LSE: ld{{u?}}rh
-; CHECK-LSE: ldrh
-
+; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldrh w8, [x0, #8190]
+; CHECK-NOLSE-O1-NEXT:    ldrh w9, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O1-NEXT:    ldurh w10, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    ldrh w11, [x11]
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w9
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10
+; CHECK-NOLSE-O1-NEXT:    add w0, w8, w11
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x0, #8190]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x8]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    subs x9, x0, #256 ; =256
+; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: atomic_load_relaxed_16:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldrh w8, [x0, #8190]
+; CHECK-LSE-O1-NEXT:    ldrh w9, [x0, w1, sxtw #1]
+; CHECK-LSE-O1-NEXT:    add w8, w8, w9
+; CHECK-LSE-O1-NEXT:    ldurh w9, [x0, #-256]
+; CHECK-LSE-O1-NEXT:    add w8, w8, w9
+; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O1-NEXT:    ldrh w9, [x9]
+; CHECK-LSE-O1-NEXT:    add w0, w8, w9
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: atomic_load_relaxed_16:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldrh w9, [x0, #8190]
+; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-LSE-O0-NEXT:    ldrh w8, [x8]
+; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxth
+; CHECK-LSE-O0-NEXT:    subs x9, x0, #256 ; =256
+; CHECK-LSE-O0-NEXT:    ldrh w9, [x9]
+; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxth
+; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O0-NEXT:    ldrh w9, [x9]
+; CHECK-LSE-O0-NEXT:    add w0, w8, w9, uxth
+; CHECK-LSE-O0-NEXT:    ret
   %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
   %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2
 
@@ -371,20 +629,41 @@ define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 {
 }
 
 define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 {
-; CHECK-NOLSE-LABEL: atomic_load_relaxed_32:
-; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x0, #16380]
-; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x0, w1, sxtw #2]
-; CHECK-NOLSE-O1: ldur   {{w[0-9]+}}, [x0, #-256]
-; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12          ; =1191936
-; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldr w8, [x0, #16380]
+; CHECK-NOLSE-O1-NEXT:    ldr w9, [x0, w1, sxtw #2]
+; CHECK-NOLSE-O1-NEXT:    ldur w10, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    ldr w11, [x11]
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w9
+; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10
+; CHECK-NOLSE-O1-NEXT:    add w0, w8, w11
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0, #16380]
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [x0, w1, sxtw #2]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9
+; CHECK-NOLSE-O0-NEXT:    ldur w9, [x0, #-256]
+; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9
+; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: atomic_load_relaxed_32:
-; CHECK-LSE:    ldr {{w[0-9]+}}, [x0, #16380]
-; CHECK-LSE:    ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
-; CHECK-LSE:    ldur {{w[0-9]+}}, [x0, #-256]
-; CHECK-LSE:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12 ; =1191936
-; CHECK-LSE:    ldr {{w[0-9]+}}, [x[[ADDR]]]
-
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    ldr w8, [x0, #16380]
+; CHECK-LSE-NEXT:    ldr w9, [x0, w1, sxtw #2]
+; CHECK-LSE-NEXT:    add w8, w8, w9
+; CHECK-LSE-NEXT:    ldur w9, [x0, #-256]
+; CHECK-LSE-NEXT:    add w8, w8, w9
+; CHECK-LSE-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-NEXT:    ldr w9, [x9]
+; CHECK-LSE-NEXT:    add w0, w8, w9
+; CHECK-LSE-NEXT:    ret
   %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
   %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4
 
@@ -404,20 +683,41 @@ define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 {
 }
 
 define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 {
-; CHECK-NOLSE-LABEL: atomic_load_relaxed_64:
-; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x0, #32760]
-; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
-; CHECK-NOLSE-O1:    ldur {{x[0-9]+}}, [x0, #-256]
-; CHECK-NOLSE-O1:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x[[ADDR]]]
-
+; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldr x8, [x0, #32760]
+; CHECK-NOLSE-O1-NEXT:    ldr x9, [x0, w1, sxtw #3]
+; CHECK-NOLSE-O1-NEXT:    ldur x10, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    ldr x11, [x11]
+; CHECK-NOLSE-O1-NEXT:    add x8, x8, x9
+; CHECK-NOLSE-O1-NEXT:    add x8, x8, x10
+; CHECK-NOLSE-O1-NEXT:    add x0, x8, x11
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0, #32760]
+; CHECK-NOLSE-O0-NEXT:    ldr x9, [x0, w1, sxtw #3]
+; CHECK-NOLSE-O0-NEXT:    add x8, x8, x9
+; CHECK-NOLSE-O0-NEXT:    ldur x9, [x0, #-256]
+; CHECK-NOLSE-O0-NEXT:    add x8, x8, x9
+; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    ldr x9, [x9]
+; CHECK-NOLSE-O0-NEXT:    add x0, x8, x9
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: atomic_load_relaxed_64:
-; CHECK-LSE:    ldr {{x[0-9]+}}, [x0, #32760]
-; CHECK-LSE:    ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
-; CHECK-LSE:    ldur {{x[0-9]+}}, [x0, #-256]
-; CHECK-LSE:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK-LSE:    ldr {{x[0-9]+}}, [x[[ADDR]]]
-
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    ldr x8, [x0, #32760]
+; CHECK-LSE-NEXT:    ldr x9, [x0, w1, sxtw #3]
+; CHECK-LSE-NEXT:    add x8, x8, x9
+; CHECK-LSE-NEXT:    ldur x9, [x0, #-256]
+; CHECK-LSE-NEXT:    add x8, x8, x9
+; CHECK-LSE-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-NEXT:    ldr x9, [x9]
+; CHECK-LSE-NEXT:    add x0, x8, x9
+; CHECK-LSE-NEXT:    ret
   %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
   %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8
 
@@ -438,26 +738,66 @@ define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 {
 
 
 define void @atomc_store(i32* %p) #0 {
+;
 ; CHECK-NOLSE-LABEL: atomc_store:
-; CHECK-NOLSE:    mov w8, #4
-; CHECK-NOLSE:    stlr w8, [x0]
-; CHECK-NOLSE:    ret
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    mov w8, #4
+; CHECK-NOLSE-NEXT:    stlr w8, [x0]
+; CHECK-NOLSE-NEXT:    ret
 ;
 ; CHECK-LSE-LABEL: atomc_store:
-; CHECK-LSE:    mov [[FOUR:w[0-9]+]], #4
-; CHECK-LSE:    stlr [[FOUR]], [x0]
-; CHECK-LSE:    ret
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    mov w8, #4
+; CHECK-LSE-NEXT:    stlr w8, [x0]
+; CHECK-LSE-NEXT:    ret
    store atomic i32 4, i32* %p seq_cst, align 4
    ret void
 }
 
 define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
-; CHECK-NOLSE-LABEL: atomic_store_relaxed_8:
-; CHECK-NOLSE: strb    w2
-; CHECK-NOLSE: strb    w2
-; CHECK-NOLSE: strb    w2
-; CHECK-NOLSE: strb    w2
-
+; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, w1, sxtw
+; CHECK-NOLSE-O1-NEXT:    sub x9, x0, #256 ; =256
+; CHECK-NOLSE-O1-NEXT:    add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    strb w2, [x0, #4095]
+; CHECK-NOLSE-O1-NEXT:    strb w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    strb w2, [x9]
+; CHECK-NOLSE-O1-NEXT:    strb w2, [x10]
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    strb w2, [x0, #4095]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw
+; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    subs x8, x0, #256 ; =256
+; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    strb w2, [x0, #4095]
+; CHECK-LSE-O1-NEXT:    add x8, x0, w1, sxtw
+; CHECK-LSE-O1-NEXT:    strb w2, [x8]
+; CHECK-LSE-O1-NEXT:    sub x8, x0, #256 ; =256
+; CHECK-LSE-O1-NEXT:    strb w2, [x8]
+; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O1-NEXT:    strb w2, [x8]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    strb w2, [x0, #4095]
+; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw
+; CHECK-LSE-O0-NEXT:    strb w2, [x8]
+; CHECK-LSE-O0-NEXT:    subs x8, x0, #256 ; =256
+; CHECK-LSE-O0-NEXT:    strb w2, [x8]
+; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O0-NEXT:    strb w2, [x8]
+; CHECK-LSE-O0-NEXT:    ret
   %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
   store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
 
@@ -474,17 +814,49 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
 }
 
 define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
-; CHECK-NOLSE-LABEL: atomic_store_relaxed_16:
-; CHECK-NOLSE: strh    w2
-; CHECK-NOLSE: strh    w2
-; CHECK-NOLSE: strh    w2
-; CHECK-NOLSE: strh    w2
-
-; CHECK-LSE: strh w2
-; CHECK-LSE: strh w2
-; CHECK-LSE: strh w2
-; CHECK-LSE: strh w2
-
+; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-NOLSE-O1-NEXT:    sub x9, x0, #256 ; =256
+; CHECK-NOLSE-O1-NEXT:    add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    strh w2, [x0, #8190]
+; CHECK-NOLSE-O1-NEXT:    strh w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    strh w2, [x9]
+; CHECK-NOLSE-O1-NEXT:    strh w2, [x10]
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    strh w2, [x0, #8190]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    subs x8, x0, #256 ; =256
+; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    strh w2, [x0, #8190]
+; CHECK-LSE-O1-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-LSE-O1-NEXT:    strh w2, [x8]
+; CHECK-LSE-O1-NEXT:    sub x8, x0, #256 ; =256
+; CHECK-LSE-O1-NEXT:    strh w2, [x8]
+; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O1-NEXT:    strh w2, [x8]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    strh w2, [x0, #8190]
+; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw #1
+; CHECK-LSE-O0-NEXT:    strh w2, [x8]
+; CHECK-LSE-O0-NEXT:    subs x8, x0, #256 ; =256
+; CHECK-LSE-O0-NEXT:    strh w2, [x8]
+; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-O0-NEXT:    strh w2, [x8]
+; CHECK-LSE-O0-NEXT:    ret
   %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
   store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
 
@@ -501,17 +873,32 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
 }
 
 define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 {
-; CHECK-NOLSE-LABEL: atomic_store_relaxed_32:
-; CHECK-NOLSE:    str w2
-; CHECK-NOLSE:    str w2
-; CHECK-NOLSE:    stur w2
-; CHECK-NOLSE:    str w2
-
+; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_32:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    str w2, [x0, #16380]
+; CHECK-NOLSE-O1-NEXT:    str w2, [x0, w1, sxtw #2]
+; CHECK-NOLSE-O1-NEXT:    stur w2, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    str w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_32:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    str w2, [x0, #16380]
+; CHECK-NOLSE-O0-NEXT:    str w2, [x0, w1, sxtw #2]
+; CHECK-NOLSE-O0-NEXT:    stur w2, [x0, #-256]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    str w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: atomic_store_relaxed_32:
-; CHECK-LSE:    str w2
-; CHECK-LSE:    str w2
-; CHECK-LSE:    stur w2
-; CHECK-LSE:    str w2
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    str w2, [x0, #16380]
+; CHECK-LSE-NEXT:    str w2, [x0, w1, sxtw #2]
+; CHECK-LSE-NEXT:    stur w2, [x0, #-256]
+; CHECK-LSE-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-NEXT:    str w2, [x8]
+; CHECK-LSE-NEXT:    ret
   %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
   store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
 
@@ -528,17 +915,32 @@ define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 {
 }
 
 define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 {
-; CHECK-NOLSE-LABEL: atomic_store_relaxed_64:
-; CHECK-NOLSE:    str x2
-; CHECK-NOLSE:    str x2
-; CHECK-NOLSE:    stur x2
-; CHECK-NOLSE:    str x2
-
+; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_64:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    str x2, [x0, #32760]
+; CHECK-NOLSE-O1-NEXT:    str x2, [x0, w1, sxtw #3]
+; CHECK-NOLSE-O1-NEXT:    stur x2, [x0, #-256]
+; CHECK-NOLSE-O1-NEXT:    str x2, [x8]
+; CHECK-NOLSE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_64:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    str x2, [x0, #32760]
+; CHECK-NOLSE-O0-NEXT:    str x2, [x0, w1, sxtw #3]
+; CHECK-NOLSE-O0-NEXT:    stur x2, [x0, #-256]
+; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O0-NEXT:    str x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    ret
+;
 ; CHECK-LSE-LABEL: atomic_store_relaxed_64:
-; CHECK-LSE:    str x2
-; CHECK-LSE:    str x2
-; CHECK-LSE:    stur x2
-; CHECK-LSE:    str x2
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE-NEXT:    str x2, [x0, #32760]
+; CHECK-LSE-NEXT:    str x2, [x0, w1, sxtw #3]
+; CHECK-LSE-NEXT:    stur x2, [x0, #-256]
+; CHECK-LSE-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-LSE-NEXT:    str x2, [x8]
+; CHECK-LSE-NEXT:    ret
   %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
   store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
 


        


More information about the llvm-commits mailing list