[llvm] 5326614 - AtomicExpand: Really allow incremental legalization (#108613)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 19 21:18:37 PDT 2024
Author: Matt Arsenault
Date: 2024-09-20T08:18:33+04:00
New Revision: 5326614e2f84677515c38a17cf2f30cf66deaadc
URL: https://github.com/llvm/llvm-project/commit/5326614e2f84677515c38a17cf2f30cf66deaadc
DIFF: https://github.com/llvm/llvm-project/commit/5326614e2f84677515c38a17cf2f30cf66deaadc.diff
LOG: AtomicExpand: Really allow incremental legalization (#108613)
Fix up 100d9b89947bb1d42af20010bb594fa4c02542fc. The iterator
fixes ended up defeating the point, since newly inserted blocks
were not visited. This never erases the current block, so we can
simply not preincrement the block iterator.
The AArch64 FP atomic tests now expand the cmpxchg in the second
round of legalization.
Added:
Modified:
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 303058416166d3..3d4e2cb196a16a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -351,9 +351,8 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
bool MadeChange = false;
- for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE;) {
+ for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = &*BBI;
- ++BBI;
BasicBlock::reverse_iterator Next;
@@ -365,14 +364,8 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
if (processAtomicInstr(&Inst)) {
MadeChange = true;
- // Detect control flow change and resume iteration from the original
- // block to inspect any newly inserted blocks. This allows incremental
- // legalization of atomicrmw and cmpxchg.
- if (Next != E && BB != Next->getParent()) {
- BBI = BB->getIterator();
- BBE = F.end();
- break;
- }
+ // New blocks may have been inserted.
+ BBE = F.end();
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 0d230bb9dcc6e9..ed9c1b037d0cc7 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -43,46 +43,49 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB0_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB0_3
-; SOFTFP-NOLSE-NEXT: b .LBB0_1
-; SOFTFP-NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
ret half %res
@@ -128,46 +131,49 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB1_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB1_3
-; SOFTFP-NOLSE-NEXT: b .LBB1_1
-; SOFTFP-NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4
ret half %res
@@ -232,36 +238,40 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB2_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB2_3
-; SOFTFP-NOLSE-NEXT: b .LBB2_1
-; SOFTFP-NOLSE-NEXT: .LBB2_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
@@ -327,36 +337,40 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB3_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB3_3
-; SOFTFP-NOLSE-NEXT: b .LBB3_1
-; SOFTFP-NOLSE-NEXT: .LBB3_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
@@ -399,35 +413,38 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov w21, w1
+; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB4_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w20
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w1, w20
+; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl __addsf3
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
+; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21
; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB4_3
-; SOFTFP-NOLSE-NEXT: b .LBB4_1
-; SOFTFP-NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
@@ -469,36 +486,40 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: ldr x21, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr x20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov x21, x1
+; SOFTFP-NOLSE-NEXT: mov x20, x1
; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
-; SOFTFP-NOLSE-NEXT: mov x20, x8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB5_5
+; SOFTFP-NOLSE-NEXT: mov w9, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6
; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov x0, x20
-; SOFTFP-NOLSE-NEXT: mov x1, x21
+; SOFTFP-NOLSE-NEXT: mov x0, x21
+; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: bl __adddf3
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
+; SOFTFP-NOLSE-NEXT: cmp x8, x21
; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB5_3
-; SOFTFP-NOLSE-NEXT: b .LBB5_1
-; SOFTFP-NOLSE-NEXT: .LBB5_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov x0, x20
+; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: mov x0, x21
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
@@ -687,18 +708,18 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
-; SOFTFP-NOLSE-NEXT: mov w22, w1
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
+; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w23, w8, #16
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
-; SOFTFP-NOLSE-NEXT: mov w21, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB7_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
@@ -711,29 +732,33 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: bfi w21, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x20]
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp w22, w8
; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB7_3
-; SOFTFP-NOLSE-NEXT: b .LBB7_1
-; SOFTFP-NOLSE-NEXT: .LBB7_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
@@ -799,17 +824,18 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16
-; SOFTFP-NOLSE-NEXT: lsl w22, w8, #16
+; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w1, w21, #16
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
-; SOFTFP-NOLSE-NEXT: b.eq .LBB8_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
@@ -819,25 +845,28 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: lsl w0, w21, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: bfxil w23, w21, #0, #16
+; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w21, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w22, w23
; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB8_3
-; SOFTFP-NOLSE-NEXT: b .LBB8_1
-; SOFTFP-NOLSE-NEXT: .LBB8_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
@@ -885,45 +914,49 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w21, w1
-; SOFTFP-NOLSE-NEXT: ldp w23, w22, [x0]
+; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB9_2
-; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr x22, x23, #32
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
-; SOFTFP-NOLSE-NEXT: // kill: def $w22 killed $w22 killed $x22 def $x22
-; SOFTFP-NOLSE-NEXT: b.eq .LBB9_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6
; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __addsf3
-; SOFTFP-NOLSE-NEXT: mov w8, w23
-; SOFTFP-NOLSE-NEXT: mov w9, w0
-; SOFTFP-NOLSE-NEXT: orr x9, x9, x24, lsl #32
-; SOFTFP-NOLSE-NEXT: orr x8, x8, x22, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB9_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: mov w9, w22
+; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
+; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
+; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
+; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr x23, [x20]
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
+; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp x22, x9
; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x9, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB9_3
-; SOFTFP-NOLSE-NEXT: b .LBB9_1
-; SOFTFP-NOLSE-NEXT: .LBB9_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w23
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index bfe0d20ca814bc..888b795876f7df 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -45,46 +45,49 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB0_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB0_3
-; SOFTFP-NOLSE-NEXT: b .LBB0_1
-; SOFTFP-NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
ret half %res
@@ -130,46 +133,49 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB1_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB1_3
-; SOFTFP-NOLSE-NEXT: b .LBB1_1
-; SOFTFP-NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 4
ret half %res
@@ -234,36 +240,40 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB2_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB2_3
-; SOFTFP-NOLSE-NEXT: b .LBB2_1
-; SOFTFP-NOLSE-NEXT: .LBB2_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
@@ -329,36 +339,40 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB3_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB3_3
-; SOFTFP-NOLSE-NEXT: b .LBB3_1
-; SOFTFP-NOLSE-NEXT: .LBB3_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 4
@@ -401,35 +415,38 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov w21, w1
+; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB4_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w20
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w1, w20
+; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl fmaxf
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
+; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21
; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB4_3
-; SOFTFP-NOLSE-NEXT: b .LBB4_1
-; SOFTFP-NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
@@ -471,36 +488,40 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8:
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: ldr x21, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr x20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov x21, x1
+; SOFTFP-NOLSE-NEXT: mov x20, x1
; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
-; SOFTFP-NOLSE-NEXT: mov x20, x8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB5_5
+; SOFTFP-NOLSE-NEXT: mov w9, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6
; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov x0, x20
-; SOFTFP-NOLSE-NEXT: mov x1, x21
+; SOFTFP-NOLSE-NEXT: mov x0, x21
+; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: bl fmax
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
+; SOFTFP-NOLSE-NEXT: cmp x8, x21
; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB5_3
-; SOFTFP-NOLSE-NEXT: b .LBB5_1
-; SOFTFP-NOLSE-NEXT: .LBB5_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov x0, x20
+; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: mov x0, x21
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
@@ -567,18 +588,18 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
-; SOFTFP-NOLSE-NEXT: mov w22, w1
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
+; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB6_2
-; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w23, w8, #16
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
-; SOFTFP-NOLSE-NEXT: mov w21, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB6_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6
; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
@@ -591,29 +612,33 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: bfi w21, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x20]
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp w22, w8
; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB6_3
-; SOFTFP-NOLSE-NEXT: b .LBB6_1
-; SOFTFP-NOLSE-NEXT: .LBB6_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
@@ -723,17 +748,18 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16
-; SOFTFP-NOLSE-NEXT: lsl w22, w8, #16
+; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w1, w21, #16
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
-; SOFTFP-NOLSE-NEXT: b.eq .LBB7_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
@@ -743,25 +769,28 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: lsl w0, w21, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: bfxil w23, w21, #0, #16
+; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w21, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w22, w23
; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB7_3
-; SOFTFP-NOLSE-NEXT: b .LBB7_1
-; SOFTFP-NOLSE-NEXT: .LBB7_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
@@ -809,45 +838,49 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w21, w1
-; SOFTFP-NOLSE-NEXT: ldp w23, w22, [x0]
+; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr x22, x23, #32
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
-; SOFTFP-NOLSE-NEXT: // kill: def $w22 killed $w22 killed $x22 def $x22
-; SOFTFP-NOLSE-NEXT: b.eq .LBB8_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fmaxf
-; SOFTFP-NOLSE-NEXT: mov w8, w23
-; SOFTFP-NOLSE-NEXT: mov w9, w0
-; SOFTFP-NOLSE-NEXT: orr x9, x9, x24, lsl #32
-; SOFTFP-NOLSE-NEXT: orr x8, x8, x22, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: mov w9, w22
+; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
+; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
+; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
+; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr x23, [x20]
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
+; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp x22, x9
; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x9, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB8_3
-; SOFTFP-NOLSE-NEXT: b .LBB8_1
-; SOFTFP-NOLSE-NEXT: .LBB8_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w23
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index 6b7d2df044460a..a3665c6e428608 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -45,46 +45,49 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align2:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB0_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB0_3
-; SOFTFP-NOLSE-NEXT: b .LBB0_1
-; SOFTFP-NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, half %value seq_cst, align 2
ret half %res
@@ -130,46 +133,49 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align4:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB1_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB1_3
-; SOFTFP-NOLSE-NEXT: b .LBB1_1
-; SOFTFP-NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, half %value seq_cst, align 4
ret half %res
@@ -234,36 +240,40 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB2_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB2_3
-; SOFTFP-NOLSE-NEXT: b .LBB2_1
-; SOFTFP-NOLSE-NEXT: .LBB2_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 2
@@ -329,36 +339,40 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB3_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB3_3
-; SOFTFP-NOLSE-NEXT: b .LBB3_1
-; SOFTFP-NOLSE-NEXT: .LBB3_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 4
@@ -401,35 +415,38 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov w21, w1
+; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB4_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w20
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w1, w20
+; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl fminf
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
+; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21
; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB4_3
-; SOFTFP-NOLSE-NEXT: b .LBB4_1
-; SOFTFP-NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
@@ -471,36 +488,40 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f32_seq_cst_align8:
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: ldr x21, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr x20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov x21, x1
+; SOFTFP-NOLSE-NEXT: mov x20, x1
; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
-; SOFTFP-NOLSE-NEXT: mov x20, x8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB5_5
+; SOFTFP-NOLSE-NEXT: mov w9, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6
; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov x0, x20
-; SOFTFP-NOLSE-NEXT: mov x1, x21
+; SOFTFP-NOLSE-NEXT: mov x0, x21
+; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: bl fmin
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
+; SOFTFP-NOLSE-NEXT: cmp x8, x21
; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB5_3
-; SOFTFP-NOLSE-NEXT: b .LBB5_1
-; SOFTFP-NOLSE-NEXT: .LBB5_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov x0, x20
+; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: mov x0, x21
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
@@ -567,18 +588,18 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
-; SOFTFP-NOLSE-NEXT: mov w22, w1
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
+; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB6_2
-; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w23, w8, #16
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
-; SOFTFP-NOLSE-NEXT: mov w21, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB6_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6
; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
@@ -591,29 +612,33 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: bfi w21, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x20]
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp w22, w8
; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB6_3
-; SOFTFP-NOLSE-NEXT: b .LBB6_1
-; SOFTFP-NOLSE-NEXT: .LBB6_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
@@ -723,17 +748,18 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16
-; SOFTFP-NOLSE-NEXT: lsl w22, w8, #16
+; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w1, w21, #16
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
-; SOFTFP-NOLSE-NEXT: b.eq .LBB7_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
@@ -743,25 +769,28 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: lsl w0, w21, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: bfxil w23, w21, #0, #16
+; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w21, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w22, w23
; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB7_3
-; SOFTFP-NOLSE-NEXT: b .LBB7_1
-; SOFTFP-NOLSE-NEXT: .LBB7_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
@@ -809,45 +838,49 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w21, w1
-; SOFTFP-NOLSE-NEXT: ldp w23, w22, [x0]
+; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr x22, x23, #32
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
-; SOFTFP-NOLSE-NEXT: // kill: def $w22 killed $w22 killed $x22 def $x22
-; SOFTFP-NOLSE-NEXT: b.eq .LBB8_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl fminf
-; SOFTFP-NOLSE-NEXT: mov w8, w23
-; SOFTFP-NOLSE-NEXT: mov w9, w0
-; SOFTFP-NOLSE-NEXT: orr x9, x9, x24, lsl #32
-; SOFTFP-NOLSE-NEXT: orr x8, x8, x22, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: mov w9, w22
+; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
+; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
+; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
+; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr x23, [x20]
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
+; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp x22, x9
; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x9, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB8_3
-; SOFTFP-NOLSE-NEXT: b .LBB8_1
-; SOFTFP-NOLSE-NEXT: .LBB8_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w23
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 67e164037d5ce7..7725ce0e731859 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -43,46 +43,49 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align2:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB0_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB0_3
-; SOFTFP-NOLSE-NEXT: b .LBB0_1
-; SOFTFP-NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, half %value seq_cst, align 2
ret half %res
@@ -128,46 +131,49 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
;
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align4:
; SOFTFP-NOLSE: // %bb.0:
-; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w23
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB1_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: and w23, w20, #0xffff
; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB1_3
-; SOFTFP-NOLSE-NEXT: b .LBB1_1
-; SOFTFP-NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, half %value seq_cst, align 4
ret half %res
@@ -232,36 +238,40 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB2_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB2_3
-; SOFTFP-NOLSE-NEXT: b .LBB2_1
-; SOFTFP-NOLSE-NEXT: .LBB2_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 2
@@ -327,36 +337,40 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT: lsl w21, w1, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
+; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB3_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
-; SOFTFP-NOLSE-NEXT: lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w21, w0
+; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB3_3
-; SOFTFP-NOLSE-NEXT: b .LBB3_1
-; SOFTFP-NOLSE-NEXT: .LBB3_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 4
@@ -399,35 +413,38 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr w20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov w21, w1
+; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
+; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
-; SOFTFP-NOLSE-NEXT: mov w20, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB4_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w20
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w1, w20
+; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl __subsf3
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w8, w20
+; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w0, w21
; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB4_3
-; SOFTFP-NOLSE-NEXT: b .LBB4_1
-; SOFTFP-NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w20
+; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
@@ -469,36 +486,40 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f32_seq_cst_align8:
; SOFTFP-NOLSE: // %bb.0:
; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT: ldr x21, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldr x20, [x0]
; SOFTFP-NOLSE-NEXT: mov x19, x0
-; SOFTFP-NOLSE-NEXT: mov x21, x1
+; SOFTFP-NOLSE-NEXT: mov x20, x1
; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
-; SOFTFP-NOLSE-NEXT: mov x20, x8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB5_5
+; SOFTFP-NOLSE-NEXT: mov w9, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6
; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov x0, x20
-; SOFTFP-NOLSE-NEXT: mov x1, x21
+; SOFTFP-NOLSE-NEXT: mov x0, x21
+; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: bl __subdf3
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT: cmp x8, x20
+; SOFTFP-NOLSE-NEXT: cmp x8, x21
; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB5_3
-; SOFTFP-NOLSE-NEXT: b .LBB5_1
-; SOFTFP-NOLSE-NEXT: .LBB5_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov x0, x20
+; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: mov x21, x8
+; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT: mov x0, x21
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ret
%res = atomicrmw fsub ptr %ptr, double %value seq_cst, align 8
@@ -687,18 +708,18 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
-; SOFTFP-NOLSE-NEXT: mov w22, w1
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
+; SOFTFP-NOLSE-NEXT: mov w21, w1
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w23, w8, #16
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
-; SOFTFP-NOLSE-NEXT: mov w21, w8
-; SOFTFP-NOLSE-NEXT: b.eq .LBB7_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
@@ -711,29 +732,33 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT: bfi w21, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
+; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w8, [x20]
-; SOFTFP-NOLSE-NEXT: cmp w8, w21
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp w22, w8
; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB7_3
-; SOFTFP-NOLSE-NEXT: b .LBB7_1
-; SOFTFP-NOLSE-NEXT: .LBB7_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
@@ -799,17 +824,18 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: ldrh w21, [x0]
+; SOFTFP-NOLSE-NEXT: ldrh w22, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16
-; SOFTFP-NOLSE-NEXT: lsl w22, w8, #16
+; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr w1, w21, #16
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
-; SOFTFP-NOLSE-NEXT: b.eq .LBB8_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
@@ -819,25 +845,28 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: lsl w0, w21, #16
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16
+; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT: bfxil w23, w21, #0, #16
+; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr w21, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w21, w23
+; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
+; SOFTFP-NOLSE-NEXT: cmp w22, w23
; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB8_3
-; SOFTFP-NOLSE-NEXT: b .LBB8_1
-; SOFTFP-NOLSE-NEXT: .LBB8_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
@@ -885,45 +914,49 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w21, w1
-; SOFTFP-NOLSE-NEXT: ldp w23, w22, [x0]
+; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0]
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
; SOFTFP-NOLSE-NEXT: b .LBB9_2
-; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT: lsr x22, x23, #32
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
-; SOFTFP-NOLSE-NEXT: // kill: def $w22 killed $w22 killed $x22 def $x22
-; SOFTFP-NOLSE-NEXT: b.eq .LBB9_5
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6
; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: mov w0, w23
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w21
; SOFTFP-NOLSE-NEXT: bl __subsf3
-; SOFTFP-NOLSE-NEXT: mov w8, w23
-; SOFTFP-NOLSE-NEXT: mov w9, w0
-; SOFTFP-NOLSE-NEXT: orr x9, x9, x24, lsl #32
-; SOFTFP-NOLSE-NEXT: orr x8, x8, x22, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB9_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: mov w8, w0
+; SOFTFP-NOLSE-NEXT: mov w9, w22
+; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
+; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
+; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
+; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start
; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT: ldaxr x23, [x20]
-; SOFTFP-NOLSE-NEXT: cmp x23, x8
+; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
+; SOFTFP-NOLSE-NEXT: cmp x22, x9
; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2
-; SOFTFP-NOLSE-NEXT: stlxr wzr, x9, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz wzr, .LBB9_3
-; SOFTFP-NOLSE-NEXT: b .LBB9_1
-; SOFTFP-NOLSE-NEXT: .LBB9_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT: mov w0, w23
-; SOFTFP-NOLSE-NEXT: mov w1, w22
+; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3
+; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
+; SOFTFP-NOLSE-NEXT: mov w0, w22
+; SOFTFP-NOLSE-NEXT: mov w1, w23
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
More information about the llvm-commits
mailing list