[llvm] 8a645fc - [AArch64] Enable type promotion for AArch64

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 29 07:13:18 PDT 2021


Author: David Green
Date: 2021-09-29T15:13:12+01:00
New Revision: 8a645fc44b545acdea9fb3daf8642ab4b9e219a0

URL: https://github.com/llvm/llvm-project/commit/8a645fc44b545acdea9fb3daf8642ab4b9e219a0
DIFF: https://github.com/llvm/llvm-project/commit/8a645fc44b545acdea9fb3daf8642ab4b9e219a0.diff

LOG: [AArch64] Enable type promotion for AArch64

This enables the type promotion pass for AArch64, which acts as a
CodeGenPrepare pass to promote illegal integers to legal ones,
especially useful for removing extends that would otherwise require
cross-basic-block analysis.

I have enabled this generally, for both ISel and GlobalISel. In some
quick experiments it appeared to help GlobalISel remove extra extends in
places too, but that might just be missing optimizations that are better
left for later. We can disable it again if required.

In my experiments, this can improvement performance in some cases, and
codesize was a small improvement. SPEC was a very small improvement,
within the noise. Some of the test cases show extends being moved out of
loops, often when the extend would be part of a cmp operand, but that
should reduce the latency of the instruction in the loop on many cpus.
The signed-truncation-check tests are increasing as they are no longer
matching specific DAG combines.

We also hope to add some additional improvements to the pass in the near
future, to capture more cases of promoting extends through phis that
have come up in a few places lately.

Differential Revision: https://reviews.llvm.org/D110239

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
    llvm/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
    llvm/test/CodeGen/AArch64/atomic-ops.ll
    llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
    llvm/test/CodeGen/AArch64/signed-truncation-check.ll
    llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
    llvm/test/CodeGen/AArch64/typepromotion-phisret.ll
    llvm/test/CodeGen/AArch64/typepromotion-signed.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 99bcb2f4649a..6127f890118f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -471,6 +471,7 @@ class AArch64PassConfig : public TargetPassConfig {
 
   void addIRPasses()  override;
   bool addPreISel() override;
+  void addCodeGenPrepare() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
   void addPreLegalizeMachineIR() override;
@@ -597,6 +598,12 @@ bool AArch64PassConfig::addPreISel() {
   return false;
 }
 
+void AArch64PassConfig::addCodeGenPrepare() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createTypePromotionPass());
+  TargetPassConfig::addCodeGenPrepare();
+}
+
 bool AArch64PassConfig::addInstSelector() {
   addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 633c6e8c2032..fd3a0072d2a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1877,14 +1877,15 @@ define i8 @atomicrmw_max_i8(i8* %ptr, i8 %rhs) {
 define i8 @atomicrmw_umin_i8(i8* %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xff
 ; CHECK-NOLSE-O1-NEXT:  LBB35_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
-; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0xff
-; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxtb
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, ls
-; CHECK-NOLSE-O1-NEXT:    stlxrb w10, w9, [x0]
-; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB35_1
+; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xff
+; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-NOLSE-O1-NEXT:    stlxrb w11, w10, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB35_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; CHECK-NOLSE-O1-NEXT:    mov w0, w8
 ; CHECK-NOLSE-O1-NEXT:    ret
@@ -1947,14 +1948,15 @@ define i8 @atomicrmw_umin_i8(i8* %ptr, i8 %rhs) {
 define i8 @atomicrmw_umax_i8(i8* %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xff
 ; CHECK-NOLSE-O1-NEXT:  LBB36_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
-; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0xff
-; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxtb
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, hi
-; CHECK-NOLSE-O1-NEXT:    stxrb w10, w9, [x0]
-; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB36_1
+; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xff
+; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, hi
+; CHECK-NOLSE-O1-NEXT:    stxrb w11, w10, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB36_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; CHECK-NOLSE-O1-NEXT:    mov w0, w8
 ; CHECK-NOLSE-O1-NEXT:    ret
@@ -2556,14 +2558,15 @@ define i16 @atomicrmw_max_i16(i16* %ptr, i16 %rhs) {
 define i16 @atomicrmw_umin_i16(i16* %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xffff
 ; CHECK-NOLSE-O1-NEXT:  LBB45_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
-; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0xffff
-; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxth
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, ls
-; CHECK-NOLSE-O1-NEXT:    stlxrh w10, w9, [x0]
-; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB45_1
+; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xffff
+; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-NOLSE-O1-NEXT:    stlxrh w11, w10, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB45_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; CHECK-NOLSE-O1-NEXT:    mov w0, w8
 ; CHECK-NOLSE-O1-NEXT:    ret
@@ -2626,14 +2629,15 @@ define i16 @atomicrmw_umin_i16(i16* %ptr, i16 %rhs) {
 define i16 @atomicrmw_umax_i16(i16* %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xffff
 ; CHECK-NOLSE-O1-NEXT:  LBB46_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
-; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0xffff
-; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxth
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, hi
-; CHECK-NOLSE-O1-NEXT:    stxrh w10, w9, [x0]
-; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB46_1
+; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xffff
+; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, hi
+; CHECK-NOLSE-O1-NEXT:    stxrh w11, w10, [x0]
+; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB46_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; CHECK-NOLSE-O1-NEXT:    mov w0, w8
 ; CHECK-NOLSE-O1-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 1d6eca60838f..95816bd9d326 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -76,6 +76,7 @@
 ; CHECK-NEXT:       Interleaved Load Combine Pass
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Interleaved Access Pass
+; CHECK-NEXT:       Type Promotion
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:       Dominator Tree Construction

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 665ba38055d4..3ad77b38376e 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -149,7 +149,6 @@ define zeroext i1 @test8_6(i8 zeroext %x)  align 2 {
 ; CHECK-LABEL: test8_6:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub w8, w0, #58
-; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    cmp w8, #154
 ; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
@@ -293,8 +292,8 @@ define zeroext i1 @test16_6(i16 zeroext %x)  align 2 {
 ; CHECK-NEXT:    mov w8, #-32194
 ; CHECK-NEXT:    add w8, w0, w8
 ; CHECK-NEXT:    mov w9, #24320
-; CHECK-NEXT:    cmp w9, w8, uxth
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
 entry:
   %0 = add i16 %x, -32194

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index 1bc8a9606ca2..f63b5d1f4528 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -552,17 +552,18 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -579,17 +580,18 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -870,17 +872,18 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -897,17 +900,18 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7216,17 +7220,18 @@ define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7243,17 +7248,18 @@ define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7374,17 +7380,18 @@ define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acquire:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -7401,17 +7408,18 @@ define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acquire:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -7532,17 +7540,18 @@ define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7559,17 +7568,18 @@ define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind
 ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7690,17 +7700,18 @@ define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_release:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -7717,17 +7728,18 @@ define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_release:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -7848,17 +7860,18 @@ define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7875,17 +7888,18 @@ define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8006,17 +8020,18 @@ define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8033,17 +8048,18 @@ define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8164,17 +8180,18 @@ define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acquire:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -8191,17 +8208,18 @@ define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acquire:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -8322,17 +8340,18 @@ define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8349,17 +8368,18 @@ define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind
 ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8480,17 +8500,18 @@ define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_release:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -8507,17 +8528,18 @@ define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_release:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -8638,17 +8660,18 @@ define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8665,17 +8688,18 @@ define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst:
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    and w8, w0, #0xffff
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
-; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
-; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w8
+; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w8, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index e1fdf4c16f45..4f8b17c93509 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -822,17 +822,18 @@ define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
 ; CHECK-NEXT:  .LBB32_1: // %atomicrmw.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxtb
-; CHECK-NEXT:    csel w10, w8, w0, ls
+; CHECK-NEXT:    ldxrb w0, [x9]
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    csel w10, w0, w8, ls
 ; CHECK-NEXT:    stxrb w11, w10, [x9]
 ; CHECK-NEXT:    cbnz w11, .LBB32_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
    ret i8 %old
@@ -841,17 +842,18 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
 ; CHECK-NEXT:  .LBB33_1: // %atomicrmw.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    csel w10, w8, w0, ls
+; CHECK-NEXT:    ldaxrh w0, [x9]
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    csel w10, w0, w8, ls
 ; CHECK-NEXT:    stxrh w11, w10, [x9]
 ; CHECK-NEXT:    cbnz w11, .LBB33_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
    ret i16 %old
@@ -898,17 +900,18 @@ define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
 ; CHECK-NEXT:  .LBB36_1: // %atomicrmw.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxtb
-; CHECK-NEXT:    csel w10, w8, w0, hi
+; CHECK-NEXT:    ldaxrb w0, [x9]
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    csel w10, w0, w8, hi
 ; CHECK-NEXT:    stlxrb w11, w10, [x9]
 ; CHECK-NEXT:    cbnz w11, .LBB36_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
    ret i8 %old
@@ -917,17 +920,18 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
 ; CHECK-NEXT:  .LBB37_1: // %atomicrmw.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    csel w10, w8, w0, hi
+; CHECK-NEXT:    ldxrh w0, [x9]
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    csel w10, w0, w8, hi
 ; CHECK-NEXT:    stxrh w11, w10, [x9]
 ; CHECK-NEXT:    cbnz w11, .LBB37_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
    ret i16 %old
@@ -975,23 +979,24 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
 ; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
+; INLINE_ATOMICS-NEXT:    and w8, w0, #0xff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; INLINE_ATOMICS-NEXT:  .LBB40_1: // %cmpxchg.start
 ; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; INLINE_ATOMICS-NEXT:    ldaxrb w8, [x9]
-; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; INLINE_ATOMICS-NEXT:    ldaxrb w0, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w0, w8
 ; INLINE_ATOMICS-NEXT:    b.ne .LBB40_4
 ; INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
 ; INLINE_ATOMICS-NEXT:    // in Loop: Header=BB40_1 Depth=1
 ; INLINE_ATOMICS-NEXT:    stxrb w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB40_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
-; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB40_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
@@ -1011,23 +1016,24 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
 ; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
+; INLINE_ATOMICS-NEXT:    and w8, w0, #0xffff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; INLINE_ATOMICS-NEXT:  .LBB41_1: // %cmpxchg.start
 ; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
-; INLINE_ATOMICS-NEXT:    ldaxrh w8, [x9]
-; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; INLINE_ATOMICS-NEXT:    ldaxrh w0, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w0, w8
 ; INLINE_ATOMICS-NEXT:    b.ne .LBB41_4
 ; INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
 ; INLINE_ATOMICS-NEXT:    // in Loop: Header=BB41_1 Depth=1
 ; INLINE_ATOMICS-NEXT:    stlxrh w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB41_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
-; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB41_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 12d5b20409cf..c981941bdbe8 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -52,16 +52,17 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    and w8, w1, #0xff
 ; CHECK-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:  LBB1_1: ; %cmpxchg.start
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w8, [x0]
-; CHECK-NEXT:    cmp w8, w1, uxtb
+; CHECK-NEXT:    ldaxrb w9, [x0]
+; CHECK-NEXT:    cmp w9, w8
 ; CHECK-NEXT:    b.ne LBB1_4
 ; CHECK-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    stlxrb w8, w2, [x0]
-; CHECK-NEXT:    cbnz w8, LBB1_1
+; CHECK-NEXT:    stlxrb w9, w2, [x0]
+; CHECK-NEXT:    cbnz w9, LBB1_1
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    mov w8, #1
 ; CHECK-NEXT:    eor w0, w8, #0x1

diff  --git a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
index 6b6682cd9219..56a18b00a974 100644
--- a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
@@ -99,10 +99,11 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    sub w8, w8, #128
+; CHECK-NEXT:    lsr w8, w8, #8
+; CHECK-NEXT:    cmp w8, #255
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
   %tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8
@@ -168,10 +169,11 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ulecmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    sub w8, w8, #128
+; CHECK-NEXT:    lsr w8, w8, #8
+; CHECK-NEXT:    cmp w8, #255
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
   %tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1

diff  --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 4a3002554cde..ab42e6463fee 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -99,10 +99,11 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    sub w8, w8, #128
+; CHECK-NEXT:    lsr w8, w8, #8
+; CHECK-NEXT:    cmp w8, #254
+; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
   %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8
@@ -181,10 +182,11 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ugtcmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    sub w8, w8, #128
+; CHECK-NEXT:    lsr w8, w8, #8
+; CHECK-NEXT:    cmp w8, #254
+; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
   %tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1

diff  --git a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
index c9efc1d0418e..c8321aa0dfd2 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
@@ -148,7 +148,6 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
 ; CHECK-LABEL: safe_add_underflow_neg:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, #2
-; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    mov w9, #16
 ; CHECK-NEXT:    cmp w8, #251
 ; CHECK-NEXT:    mov w8, #8
@@ -207,7 +206,6 @@ define i32 @safe_sub_underflow_neg(i8 zeroext %a) {
 ; CHECK-LABEL: safe_sub_underflow_neg:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, #4
-; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    mov w9, #16
 ; CHECK-NEXT:    cmp w8, #250
 ; CHECK-NEXT:    mov w8, #8
@@ -284,12 +282,12 @@ define i8 @convert_add_order(i8 zeroext %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
 ; CHECK-NEXT:    sub w10, w8, #40
-; CHECK-NEXT:    and w10, w10, #0xff
 ; CHECK-NEXT:    mov w9, #1
 ; CHECK-NEXT:    cmp w10, #20
 ; CHECK-NEXT:    cinc w9, w9, hs
 ; CHECK-NEXT:    cmp w8, #50
-; CHECK-NEXT:    csinv w8, w9, wzr, lo
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    csel w8, w9, w8, lo
 ; CHECK-NEXT:    and w0, w8, w0
 ; CHECK-NEXT:    ret
   %shl = or i8 %arg, 1
@@ -308,9 +306,8 @@ define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) {
 ; CHECK-NEXT:    cmp w0, #0
 ; CHECK-NEXT:    cset w8, gt
 ; CHECK-NEXT:    and w8, w8, w0
-; CHECK-NEXT:    sub w8, w8, #11
-; CHECK-NEXT:    and w9, w8, #0xff
-; CHECK-NEXT:    cmp w9, w1
+; CHECK-NEXT:    add w8, w8, #245
+; CHECK-NEXT:    cmp w8, w1
 ; CHECK-NEXT:    mov w9, #100
 ; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
@@ -330,9 +327,8 @@ define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) {
 ; CHECK-NEXT:    cmp w0, #0
 ; CHECK-NEXT:    cset w8, gt
 ; CHECK-NEXT:    and w8, w8, w0
-; CHECK-NEXT:    sub w8, w8, #11
-; CHECK-NEXT:    and w9, w8, #0xff
-; CHECK-NEXT:    cmp w9, w1, uxtb
+; CHECK-NEXT:    add w8, w8, #245
+; CHECK-NEXT:    cmp w8, w1, uxtb
 ; CHECK-NEXT:    mov w9, #100
 ; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll
index 7ef74ffc5be8..34d0d35cd5a9 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll
@@ -5,17 +5,16 @@ define void @phi_feeding_phi_args(i8 %a, i8 %b) {
 ; CHECK-LABEL: phi_feeding_phi_args:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    cmp w8, w1, uxtb
-; CHECK-NEXT:    csel w8, w0, w1, hi
+; CHECK-NEXT:    and w9, w1, #0xff
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, hi
 ; CHECK-NEXT:  .LBB0_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    and w9, w8, #0xff
-; CHECK-NEXT:    sub w10, w8, #2
-; CHECK-NEXT:    lsl w8, w8, #1
-; CHECK-NEXT:    cmp w9, #254
-; CHECK-NEXT:    csel w8, w10, w8, lo
-; CHECK-NEXT:    mvn w9, w8
-; CHECK-NEXT:    tst w9, #0xff
+; CHECK-NEXT:    sub w9, w8, #2
+; CHECK-NEXT:    lsl w10, w8, #1
+; CHECK-NEXT:    cmp w8, #254
+; CHECK-NEXT:    csel w8, w9, w10, lo
+; CHECK-NEXT:    cmp w8, #255
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -59,13 +58,11 @@ define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-NEXT:    csel w8, w0, w1, hi
 ; CHECK-NEXT:  .LBB1_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    and w9, w8, #0xff
-; CHECK-NEXT:    sub w10, w8, #2
-; CHECK-NEXT:    lsl w8, w8, #1
-; CHECK-NEXT:    cmp w9, #254
-; CHECK-NEXT:    csel w8, w10, w8, lo
-; CHECK-NEXT:    mvn w9, w8
-; CHECK-NEXT:    tst w9, #0xff
+; CHECK-NEXT:    sub w9, w8, #2
+; CHECK-NEXT:    lsl w10, w8, #1
+; CHECK-NEXT:    cmp w8, #254
+; CHECK-NEXT:    csel w8, w9, w10, lo
+; CHECK-NEXT:    cmp w8, #255
 ; CHECK-NEXT:    b.ne .LBB1_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -109,12 +106,10 @@ define void @phi_i16() {
 ; CHECK-NEXT:    mov w9, #1
 ; CHECK-NEXT:  .LBB2_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    and w10, w8, #0xffff
-; CHECK-NEXT:    cmp w10, #128
+; CHECK-NEXT:    cmp w8, #128
 ; CHECK-NEXT:    cinc w10, w9, lo
 ; CHECK-NEXT:    add w8, w8, w10
-; CHECK-NEXT:    and w10, w8, #0xffff
-; CHECK-NEXT:    cmp w10, #253
+; CHECK-NEXT:    cmp w8, #253
 ; CHECK-NEXT:    b.lo .LBB2_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -150,12 +145,10 @@ define i8 @ret_i8() {
 ; CHECK-NEXT:    mov w8, #1
 ; CHECK-NEXT:  .LBB3_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    cmp w9, #0
-; CHECK-NEXT:    cinc w9, w8, ge
+; CHECK-NEXT:    cmp w0, #128
+; CHECK-NEXT:    cinc w9, w8, lo
 ; CHECK-NEXT:    add w0, w0, w9
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    cmp w9, #252
+; CHECK-NEXT:    cmp w0, #252
 ; CHECK-NEXT:    b.hi .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -191,12 +184,10 @@ define i16 @phi_multiple_undefs(i16 zeroext %arg) {
 ; CHECK-NEXT:    // implicit-def: $w9
 ; CHECK-NEXT:  .LBB4_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    and w10, w9, #0xffff
-; CHECK-NEXT:    cmp w10, #128
+; CHECK-NEXT:    cmp w9, #128
 ; CHECK-NEXT:    cinc w10, w8, lo
 ; CHECK-NEXT:    add w9, w9, w10
-; CHECK-NEXT:    and w10, w9, #0xffff
-; CHECK-NEXT:    cmp w10, #253
+; CHECK-NEXT:    cmp w9, #253
 ; CHECK-NEXT:    b.lo .LBB4_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -231,8 +222,8 @@ define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w8, w0, w0, lsl #1
 ; CHECK-NEXT:    add w8, w8, #45
-; CHECK-NEXT:    cmp w1, w8, uxth
-; CHECK-NEXT:    cset w8, hi
+; CHECK-NEXT:    cmp w8, w1
+; CHECK-NEXT:    cset w8, lo
 ; CHECK-NEXT:    strb w8, [x2]
 ; CHECK-NEXT:    ret
   %add = add nuw i16 %arg1, 15
@@ -246,27 +237,25 @@ define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) {
 define i16 @signext_bitcast_phi_select(i16 signext %start, i16* %in) {
 ; CHECK-LABEL: signext_bitcast_phi_select:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    mov w9, #-1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    cmp w9, w0, sxth
+; CHECK-NEXT:    cmp w9, w8, sxth
 ; CHECK-NEXT:    b.lt .LBB6_3
 ; CHECK-NEXT:  .LBB6_1: // %if.then
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    sxth x8, w0
-; CHECK-NEXT:    ldrh w8, [x1, x8, lsl #1]
-; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    ldrh w0, [x1, w8, sxtw #1]
+; CHECK-NEXT:    cmp w0, w8
 ; CHECK-NEXT:    b.eq .LBB6_4
 ; CHECK-NEXT:  // %bb.2: // %if.else
 ; CHECK-NEXT:    // in Loop: Header=BB6_1 Depth=1
-; CHECK-NEXT:    mvn w8, w0
-; CHECK-NEXT:    and w8, w8, #0x8000
-; CHECK-NEXT:    add w0, w0, w8, lsr #15
-; CHECK-NEXT:    cmp w9, w0, sxth
+; CHECK-NEXT:    lsr w10, w8, #15
+; CHECK-NEXT:    eor w10, w10, #0x1
+; CHECK-NEXT:    add w8, w10, w8
+; CHECK-NEXT:    cmp w9, w8, sxth
 ; CHECK-NEXT:    b.ge .LBB6_1
 ; CHECK-NEXT:  .LBB6_3:
-; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:  .LBB6_4: // %exit
-; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
 entry:
   %const = bitcast i16 -1 to i16

diff  --git a/llvm/test/CodeGen/AArch64/typepromotion-signed.ll b/llvm/test/CodeGen/AArch64/typepromotion-signed.ll
index e59f409c0187..805b80c1133c 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-signed.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-signed.ll
@@ -77,8 +77,7 @@ define i32 @test_signext_b_ult_slt(i8* nocapture readonly %ptr, i8 signext %arg)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    mov w9, #57
-; CHECK-NEXT:    add w10, w8, w1
-; CHECK-NEXT:    and w10, w10, #0xff
+; CHECK-NEXT:    add w10, w8, w1, uxtb
 ; CHECK-NEXT:    cmp w10, #127
 ; CHECK-NEXT:    ccmp w8, #0, #0, ne
 ; CHECK-NEXT:    mov w8, #42


        


More information about the llvm-commits mailing list