[llvm] r363487 - [RISCV] Regenerate remat.ll and atomic-rmw.ll after D43256

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 15 00:49:14 PDT 2019


Author: maskray
Date: Sat Jun 15 00:49:14 2019
New Revision: 363487

URL: http://llvm.org/viewvc/llvm-project?rev=363487&view=rev
Log:
[RISCV] Regenerate remat.ll and atomic-rmw.ll after D43256

Modified:
    llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll
    llvm/trunk/test/CodeGen/RISCV/remat.ll

Modified: llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll?rev=363487&r1=363486&r2=363487&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll Sat Jun 15 00:49:14 2019
@@ -2083,17 +2083,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB35_2
 ; RV32I-NEXT:  .LBB35_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB35_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB35_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB35_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB35_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB35_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2102,8 +2094,18 @@ define i8 @atomicrmw_max_i8_monotonic(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB35_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB35_4
+; RV32I-NEXT:  .LBB35_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB35_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB35_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB35_1
+; RV32I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2156,17 +2158,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB35_2
 ; RV64I-NEXT:  .LBB35_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB35_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB35_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB35_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB35_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2175,8 +2169,18 @@ define i8 @atomicrmw_max_i8_monotonic(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB35_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB35_4
+; RV64I-NEXT:  .LBB35_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB35_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB35_1
+; RV64I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2233,17 +2237,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB36_2
 ; RV32I-NEXT:  .LBB36_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB36_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB36_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB36_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB36_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB36_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2252,8 +2248,18 @@ define i8 @atomicrmw_max_i8_acquire(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB36_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB36_4
+; RV32I-NEXT:  .LBB36_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB36_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB36_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB36_1
+; RV32I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2306,17 +2312,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB36_2
 ; RV64I-NEXT:  .LBB36_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB36_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB36_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB36_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB36_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB36_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2325,8 +2323,18 @@ define i8 @atomicrmw_max_i8_acquire(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB36_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB36_4
+; RV64I-NEXT:  .LBB36_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB36_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB36_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB36_1
+; RV64I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2383,17 +2391,9 @@ define i8 @atomicrmw_max_i8_release(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB37_2
 ; RV32I-NEXT:  .LBB37_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB37_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB37_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB37_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB37_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB37_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2402,8 +2402,18 @@ define i8 @atomicrmw_max_i8_release(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB37_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB37_4
+; RV32I-NEXT:  .LBB37_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB37_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB37_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB37_1
+; RV32I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2456,17 +2466,9 @@ define i8 @atomicrmw_max_i8_release(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB37_2
 ; RV64I-NEXT:  .LBB37_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB37_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB37_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB37_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB37_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB37_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2475,8 +2477,18 @@ define i8 @atomicrmw_max_i8_release(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB37_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB37_4
+; RV64I-NEXT:  .LBB37_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB37_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB37_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB37_1
+; RV64I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2533,17 +2545,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB38_2
 ; RV32I-NEXT:  .LBB38_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB38_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB38_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB38_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB38_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB38_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2552,8 +2556,18 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB38_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB38_4
+; RV32I-NEXT:  .LBB38_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB38_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB38_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB38_1
+; RV32I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2606,17 +2620,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB38_2
 ; RV64I-NEXT:  .LBB38_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB38_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB38_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB38_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB38_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB38_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2625,8 +2631,18 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB38_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB38_4
+; RV64I-NEXT:  .LBB38_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB38_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB38_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB38_1
+; RV64I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2683,17 +2699,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB39_2
 ; RV32I-NEXT:  .LBB39_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB39_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB39_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB39_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB39_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB39_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2702,8 +2710,18 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB39_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB39_4
+; RV32I-NEXT:  .LBB39_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB39_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB39_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB39_1
+; RV32I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2756,17 +2774,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB39_2
 ; RV64I-NEXT:  .LBB39_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB39_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB39_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB39_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB39_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB39_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2775,8 +2785,18 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB39_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB39_4
+; RV64I-NEXT:  .LBB39_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB39_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB39_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB39_1
+; RV64I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2833,17 +2853,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB40_2
 ; RV32I-NEXT:  .LBB40_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB40_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB40_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB40_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB40_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB40_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -2852,8 +2864,18 @@ define i8 @atomicrmw_min_i8_monotonic(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB40_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB40_4
+; RV32I-NEXT:  .LBB40_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB40_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB40_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB40_1
+; RV32I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -2906,17 +2928,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB40_2
 ; RV64I-NEXT:  .LBB40_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB40_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB40_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB40_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB40_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB40_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -2925,8 +2939,18 @@ define i8 @atomicrmw_min_i8_monotonic(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB40_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB40_4
+; RV64I-NEXT:  .LBB40_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB40_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB40_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB40_1
+; RV64I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -2983,17 +3007,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB41_2
 ; RV32I-NEXT:  .LBB41_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB41_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB41_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB41_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB41_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB41_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3002,8 +3018,18 @@ define i8 @atomicrmw_min_i8_acquire(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB41_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB41_4
+; RV32I-NEXT:  .LBB41_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB41_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB41_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB41_1
+; RV32I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3056,17 +3082,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB41_2
 ; RV64I-NEXT:  .LBB41_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB41_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB41_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB41_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB41_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB41_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3075,8 +3093,18 @@ define i8 @atomicrmw_min_i8_acquire(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB41_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB41_4
+; RV64I-NEXT:  .LBB41_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB41_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB41_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB41_1
+; RV64I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3133,17 +3161,9 @@ define i8 @atomicrmw_min_i8_release(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB42_2
 ; RV32I-NEXT:  .LBB42_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB42_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB42_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB42_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB42_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB42_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3152,8 +3172,18 @@ define i8 @atomicrmw_min_i8_release(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB42_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB42_4
+; RV32I-NEXT:  .LBB42_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB42_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB42_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB42_1
+; RV32I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3206,17 +3236,9 @@ define i8 @atomicrmw_min_i8_release(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB42_2
 ; RV64I-NEXT:  .LBB42_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB42_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB42_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB42_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB42_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB42_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3225,8 +3247,18 @@ define i8 @atomicrmw_min_i8_release(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB42_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB42_4
+; RV64I-NEXT:  .LBB42_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB42_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB42_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB42_1
+; RV64I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3283,17 +3315,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB43_2
 ; RV32I-NEXT:  .LBB43_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB43_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB43_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB43_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB43_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB43_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3302,8 +3326,18 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB43_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB43_4
+; RV32I-NEXT:  .LBB43_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB43_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB43_1
+; RV32I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3356,17 +3390,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB43_2
 ; RV64I-NEXT:  .LBB43_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB43_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB43_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB43_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB43_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB43_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3375,8 +3401,18 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB43_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB43_4
+; RV64I-NEXT:  .LBB43_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB43_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB43_1
+; RV64I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3433,17 +3469,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    srai s0, a1, 24
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB44_2
 ; RV32I-NEXT:  .LBB44_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB44_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB44_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB44_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB44_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB44_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3452,8 +3480,18 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB44_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB44_4
+; RV32I-NEXT:  .LBB44_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB44_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB44_1
+; RV32I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3506,17 +3544,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *
 ; RV64I-NEXT:    slli a1, a1, 56
 ; RV64I-NEXT:    srai s0, a1, 56
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB44_2
 ; RV64I-NEXT:  .LBB44_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB44_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB44_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB44_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB44_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB44_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3525,8 +3555,18 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB44_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB44_4
+; RV64I-NEXT:  .LBB44_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB44_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB44_1
+; RV64I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3582,16 +3622,9 @@ define i8 @atomicrmw_umax_i8_monotonic(i
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB45_2
 ; RV32I-NEXT:  .LBB45_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s0, a1, .LBB45_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB45_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB45_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB45_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB45_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3600,8 +3633,17 @@ define i8 @atomicrmw_umax_i8_monotonic(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB45_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB45_4
+; RV32I-NEXT:  .LBB45_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s0, a1, .LBB45_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB45_1
+; RV32I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3648,16 +3690,9 @@ define i8 @atomicrmw_umax_i8_monotonic(i
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB45_2
 ; RV64I-NEXT:  .LBB45_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB45_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB45_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB45_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB45_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB45_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3666,8 +3701,17 @@ define i8 @atomicrmw_umax_i8_monotonic(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB45_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB45_4
+; RV64I-NEXT:  .LBB45_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB45_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB45_1
+; RV64I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3718,16 +3762,9 @@ define i8 @atomicrmw_umax_i8_acquire(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB46_2
 ; RV32I-NEXT:  .LBB46_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s0, a1, .LBB46_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB46_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB46_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB46_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB46_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3736,8 +3773,17 @@ define i8 @atomicrmw_umax_i8_acquire(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB46_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB46_4
+; RV32I-NEXT:  .LBB46_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s0, a1, .LBB46_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB46_1
+; RV32I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3784,16 +3830,9 @@ define i8 @atomicrmw_umax_i8_acquire(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB46_2
 ; RV64I-NEXT:  .LBB46_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB46_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB46_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB46_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB46_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB46_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3802,8 +3841,17 @@ define i8 @atomicrmw_umax_i8_acquire(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB46_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB46_4
+; RV64I-NEXT:  .LBB46_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB46_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB46_1
+; RV64I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3854,16 +3902,9 @@ define i8 @atomicrmw_umax_i8_release(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB47_2
 ; RV32I-NEXT:  .LBB47_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s0, a1, .LBB47_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB47_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB47_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB47_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB47_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -3872,8 +3913,17 @@ define i8 @atomicrmw_umax_i8_release(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB47_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB47_4
+; RV32I-NEXT:  .LBB47_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s0, a1, .LBB47_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB47_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB47_1
+; RV32I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -3920,16 +3970,9 @@ define i8 @atomicrmw_umax_i8_release(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB47_2
 ; RV64I-NEXT:  .LBB47_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB47_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB47_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB47_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB47_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB47_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -3938,8 +3981,17 @@ define i8 @atomicrmw_umax_i8_release(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB47_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB47_4
+; RV64I-NEXT:  .LBB47_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB47_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB47_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB47_1
+; RV64I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -3990,16 +4042,9 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB48_2
 ; RV32I-NEXT:  .LBB48_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s0, a1, .LBB48_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB48_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB48_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB48_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB48_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4008,8 +4053,17 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB48_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB48_4
+; RV32I-NEXT:  .LBB48_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s0, a1, .LBB48_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB48_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB48_1
+; RV32I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4056,16 +4110,9 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB48_2
 ; RV64I-NEXT:  .LBB48_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB48_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB48_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB48_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB48_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB48_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4074,8 +4121,17 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB48_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB48_4
+; RV64I-NEXT:  .LBB48_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB48_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB48_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB48_1
+; RV64I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4126,16 +4182,9 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB49_2
 ; RV32I-NEXT:  .LBB49_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s0, a1, .LBB49_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB49_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB49_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB49_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB49_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4144,8 +4193,17 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB49_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB49_4
+; RV32I-NEXT:  .LBB49_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s0, a1, .LBB49_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB49_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB49_1
+; RV32I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4192,16 +4250,9 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB49_2
 ; RV64I-NEXT:  .LBB49_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB49_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB49_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB49_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB49_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB49_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4210,8 +4261,17 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB49_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB49_4
+; RV64I-NEXT:  .LBB49_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB49_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB49_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB49_1
+; RV64I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4262,16 +4322,9 @@ define i8 @atomicrmw_umin_i8_monotonic(i
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB50_2
 ; RV32I-NEXT:  .LBB50_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s0, a1, .LBB50_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB50_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB50_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB50_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB50_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4280,8 +4333,17 @@ define i8 @atomicrmw_umin_i8_monotonic(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB50_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB50_4
+; RV32I-NEXT:  .LBB50_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s0, a1, .LBB50_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB50_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB50_1
+; RV32I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4328,16 +4390,9 @@ define i8 @atomicrmw_umin_i8_monotonic(i
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB50_2
 ; RV64I-NEXT:  .LBB50_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB50_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB50_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB50_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB50_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB50_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4346,8 +4401,17 @@ define i8 @atomicrmw_umin_i8_monotonic(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB50_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB50_4
+; RV64I-NEXT:  .LBB50_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB50_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB50_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB50_1
+; RV64I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4398,16 +4462,9 @@ define i8 @atomicrmw_umin_i8_acquire(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB51_2
 ; RV32I-NEXT:  .LBB51_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s0, a1, .LBB51_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB51_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB51_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB51_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB51_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4416,8 +4473,17 @@ define i8 @atomicrmw_umin_i8_acquire(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB51_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB51_4
+; RV32I-NEXT:  .LBB51_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s0, a1, .LBB51_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB51_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB51_1
+; RV32I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4464,16 +4530,9 @@ define i8 @atomicrmw_umin_i8_acquire(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB51_2
 ; RV64I-NEXT:  .LBB51_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB51_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB51_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB51_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB51_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB51_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4482,8 +4541,17 @@ define i8 @atomicrmw_umin_i8_acquire(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB51_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB51_4
+; RV64I-NEXT:  .LBB51_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB51_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB51_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB51_1
+; RV64I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4534,16 +4602,9 @@ define i8 @atomicrmw_umin_i8_release(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB52_2
 ; RV32I-NEXT:  .LBB52_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s0, a1, .LBB52_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB52_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB52_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB52_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB52_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4552,8 +4613,17 @@ define i8 @atomicrmw_umin_i8_release(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB52_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB52_4
+; RV32I-NEXT:  .LBB52_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s0, a1, .LBB52_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB52_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB52_1
+; RV32I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4600,16 +4670,9 @@ define i8 @atomicrmw_umin_i8_release(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB52_2
 ; RV64I-NEXT:  .LBB52_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB52_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB52_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB52_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB52_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB52_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4618,8 +4681,17 @@ define i8 @atomicrmw_umin_i8_release(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB52_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB52_4
+; RV64I-NEXT:  .LBB52_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB52_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB52_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB52_1
+; RV64I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4670,16 +4742,9 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB53_2
 ; RV32I-NEXT:  .LBB53_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s0, a1, .LBB53_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB53_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB53_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB53_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4688,8 +4753,17 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB53_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB53_4
+; RV32I-NEXT:  .LBB53_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s0, a1, .LBB53_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB53_1
+; RV32I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4736,16 +4810,9 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB53_2
 ; RV64I-NEXT:  .LBB53_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB53_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB53_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB53_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB53_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB53_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4754,8 +4821,17 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB53_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB53_4
+; RV64I-NEXT:  .LBB53_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB53_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB53_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB53_1
+; RV64I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -4806,16 +4882,9 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8
 ; RV32I-NEXT:    lbu a0, 0(a0)
 ; RV32I-NEXT:    andi s0, a1, 255
 ; RV32I-NEXT:    addi s3, sp, 11
+; RV32I-NEXT:    j .LBB54_2
 ; RV32I-NEXT:  .LBB54_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s0, a1, .LBB54_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB54_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB54_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB54_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB54_2 Depth=1
 ; RV32I-NEXT:    sb a0, 11(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -4824,8 +4893,17 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8
 ; RV32I-NEXT:    call __atomic_compare_exchange_1
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lb a0, 11(sp)
-; RV32I-NEXT:    beqz a1, .LBB54_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB54_4
+; RV32I-NEXT:  .LBB54_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    andi a1, a0, 255
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s0, a1, .LBB54_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB54_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB54_1
+; RV32I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -4872,16 +4950,9 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    andi s0, a1, 255
 ; RV64I-NEXT:    addi s3, sp, 7
+; RV64I-NEXT:    j .LBB54_2
 ; RV64I-NEXT:  .LBB54_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB54_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB54_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB54_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB54_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB54_2 Depth=1
 ; RV64I-NEXT:    sb a0, 7(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -4890,8 +4961,17 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8
 ; RV64I-NEXT:    call __atomic_compare_exchange_1
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lb a0, 7(sp)
-; RV64I-NEXT:    beqz a1, .LBB54_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB54_4
+; RV64I-NEXT:  .LBB54_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB54_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB54_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB54_1
+; RV64I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7093,17 +7173,9 @@ define i16 @atomicrmw_max_i16_monotonic(
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB90_2
 ; RV32I-NEXT:  .LBB90_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB90_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB90_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB90_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB90_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB90_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7112,8 +7184,18 @@ define i16 @atomicrmw_max_i16_monotonic(
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB90_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB90_4
+; RV32I-NEXT:  .LBB90_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB90_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB90_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB90_1
+; RV32I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7167,17 +7249,9 @@ define i16 @atomicrmw_max_i16_monotonic(
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB90_2
 ; RV64I-NEXT:  .LBB90_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB90_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB90_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB90_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB90_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB90_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7186,8 +7260,18 @@ define i16 @atomicrmw_max_i16_monotonic(
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB90_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB90_4
+; RV64I-NEXT:  .LBB90_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB90_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB90_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB90_1
+; RV64I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7245,17 +7329,9 @@ define i16 @atomicrmw_max_i16_acquire(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB91_2
 ; RV32I-NEXT:  .LBB91_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB91_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB91_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB91_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB91_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB91_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7264,8 +7340,18 @@ define i16 @atomicrmw_max_i16_acquire(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB91_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB91_4
+; RV32I-NEXT:  .LBB91_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB91_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB91_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB91_1
+; RV32I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7319,17 +7405,9 @@ define i16 @atomicrmw_max_i16_acquire(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB91_2
 ; RV64I-NEXT:  .LBB91_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB91_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB91_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB91_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB91_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB91_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7338,8 +7416,18 @@ define i16 @atomicrmw_max_i16_acquire(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB91_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB91_4
+; RV64I-NEXT:  .LBB91_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB91_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB91_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB91_1
+; RV64I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7397,17 +7485,9 @@ define i16 @atomicrmw_max_i16_release(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB92_2
 ; RV32I-NEXT:  .LBB92_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB92_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB92_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB92_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB92_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB92_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7416,8 +7496,18 @@ define i16 @atomicrmw_max_i16_release(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB92_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB92_4
+; RV32I-NEXT:  .LBB92_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB92_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB92_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB92_1
+; RV32I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7471,17 +7561,9 @@ define i16 @atomicrmw_max_i16_release(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB92_2
 ; RV64I-NEXT:  .LBB92_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB92_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB92_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB92_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB92_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB92_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7490,8 +7572,18 @@ define i16 @atomicrmw_max_i16_release(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB92_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB92_4
+; RV64I-NEXT:  .LBB92_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB92_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB92_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB92_1
+; RV64I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7549,17 +7641,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB93_2
 ; RV32I-NEXT:  .LBB93_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB93_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB93_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB93_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB93_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB93_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7568,8 +7652,18 @@ define i16 @atomicrmw_max_i16_acq_rel(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB93_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB93_4
+; RV32I-NEXT:  .LBB93_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB93_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB93_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB93_1
+; RV32I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7623,17 +7717,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB93_2
 ; RV64I-NEXT:  .LBB93_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB93_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB93_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB93_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB93_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB93_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7642,8 +7728,18 @@ define i16 @atomicrmw_max_i16_acq_rel(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB93_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB93_4
+; RV64I-NEXT:  .LBB93_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB93_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB93_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB93_1
+; RV64I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7701,17 +7797,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB94_2
 ; RV32I-NEXT:  .LBB94_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    blt s0, a1, .LBB94_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB94_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB94_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB94_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB94_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7720,8 +7808,18 @@ define i16 @atomicrmw_max_i16_seq_cst(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB94_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB94_4
+; RV32I-NEXT:  .LBB94_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    blt s0, a1, .LBB94_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB94_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB94_1
+; RV32I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7775,17 +7873,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB94_2
 ; RV64I-NEXT:  .LBB94_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB94_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB94_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB94_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB94_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB94_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7794,8 +7884,18 @@ define i16 @atomicrmw_max_i16_seq_cst(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB94_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB94_4
+; RV64I-NEXT:  .LBB94_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB94_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB94_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB94_1
+; RV64I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -7853,17 +7953,9 @@ define i16 @atomicrmw_min_i16_monotonic(
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB95_2
 ; RV32I-NEXT:  .LBB95_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB95_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB95_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB95_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB95_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB95_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -7872,8 +7964,18 @@ define i16 @atomicrmw_min_i16_monotonic(
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB95_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB95_4
+; RV32I-NEXT:  .LBB95_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB95_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB95_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB95_1
+; RV32I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -7927,17 +8029,9 @@ define i16 @atomicrmw_min_i16_monotonic(
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB95_2
 ; RV64I-NEXT:  .LBB95_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB95_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB95_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB95_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB95_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB95_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -7946,8 +8040,18 @@ define i16 @atomicrmw_min_i16_monotonic(
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB95_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB95_4
+; RV64I-NEXT:  .LBB95_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB95_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB95_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB95_1
+; RV64I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -8005,17 +8109,9 @@ define i16 @atomicrmw_min_i16_acquire(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB96_2
 ; RV32I-NEXT:  .LBB96_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB96_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB96_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB96_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB96_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB96_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -8024,8 +8120,18 @@ define i16 @atomicrmw_min_i16_acquire(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB96_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB96_4
+; RV32I-NEXT:  .LBB96_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB96_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB96_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB96_1
+; RV32I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -8079,17 +8185,9 @@ define i16 @atomicrmw_min_i16_acquire(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB96_2
 ; RV64I-NEXT:  .LBB96_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB96_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB96_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB96_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB96_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB96_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -8098,8 +8196,18 @@ define i16 @atomicrmw_min_i16_acquire(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB96_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB96_4
+; RV64I-NEXT:  .LBB96_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB96_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB96_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB96_1
+; RV64I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -8157,17 +8265,9 @@ define i16 @atomicrmw_min_i16_release(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB97_2
 ; RV32I-NEXT:  .LBB97_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB97_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB97_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB97_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB97_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB97_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -8176,8 +8276,18 @@ define i16 @atomicrmw_min_i16_release(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB97_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB97_4
+; RV32I-NEXT:  .LBB97_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB97_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB97_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB97_1
+; RV32I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -8231,17 +8341,9 @@ define i16 @atomicrmw_min_i16_release(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB97_2
 ; RV64I-NEXT:  .LBB97_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB97_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB97_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB97_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB97_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB97_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -8250,8 +8352,18 @@ define i16 @atomicrmw_min_i16_release(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB97_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB97_4
+; RV64I-NEXT:  .LBB97_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB97_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB97_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB97_1
+; RV64I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -8309,17 +8421,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB98_2
 ; RV32I-NEXT:  .LBB98_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB98_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB98_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB98_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB98_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB98_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -8328,8 +8432,18 @@ define i16 @atomicrmw_min_i16_acq_rel(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB98_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB98_4
+; RV32I-NEXT:  .LBB98_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB98_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB98_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB98_1
+; RV32I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -8383,17 +8497,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB98_2
 ; RV64I-NEXT:  .LBB98_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB98_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB98_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB98_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB98_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB98_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -8402,8 +8508,18 @@ define i16 @atomicrmw_min_i16_acq_rel(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB98_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB98_4
+; RV64I-NEXT:  .LBB98_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB98_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB98_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB98_1
+; RV64I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -8461,17 +8577,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i1
 ; RV32I-NEXT:    slli a1, a1, 16
 ; RV32I-NEXT:    srai s0, a1, 16
 ; RV32I-NEXT:    addi s3, sp, 10
+; RV32I-NEXT:    j .LBB99_2
 ; RV32I-NEXT:  .LBB99_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bge s0, a1, .LBB99_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB99_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB99_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB99_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB99_2 Depth=1
 ; RV32I-NEXT:    sh a0, 10(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -8480,8 +8588,18 @@ define i16 @atomicrmw_min_i16_seq_cst(i1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 10(sp)
-; RV32I-NEXT:    beqz a1, .LBB99_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB99_4
+; RV32I-NEXT:  .LBB99_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bge s0, a1, .LBB99_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB99_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB99_1
+; RV32I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -8535,17 +8653,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i1
 ; RV64I-NEXT:    slli a1, a1, 48
 ; RV64I-NEXT:    srai s0, a1, 48
 ; RV64I-NEXT:    addi s3, sp, 6
+; RV64I-NEXT:    j .LBB99_2
 ; RV64I-NEXT:  .LBB99_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB99_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB99_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB99_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB99_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB99_2 Depth=1
 ; RV64I-NEXT:    sh a0, 6(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -8554,8 +8664,18 @@ define i16 @atomicrmw_min_i16_seq_cst(i1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 6(sp)
-; RV64I-NEXT:    beqz a1, .LBB99_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB99_4
+; RV64I-NEXT:  .LBB99_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB99_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB99_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB99_1
+; RV64I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -8615,16 +8735,9 @@ define i16 @atomicrmw_umax_i16_monotonic
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB100_2
 ; RV32I-NEXT:  .LBB100_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s1, a1, .LBB100_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB100_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB100_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB100_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB100_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -8633,8 +8746,17 @@ define i16 @atomicrmw_umax_i16_monotonic
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB100_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB100_4
+; RV32I-NEXT:  .LBB100_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s1, a1, .LBB100_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB100_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB100_1
+; RV32I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -8686,16 +8808,9 @@ define i16 @atomicrmw_umax_i16_monotonic
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB100_2
 ; RV64I-NEXT:  .LBB100_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s1, a1, .LBB100_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB100_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB100_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB100_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB100_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -8704,8 +8819,17 @@ define i16 @atomicrmw_umax_i16_monotonic
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB100_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB100_4
+; RV64I-NEXT:  .LBB100_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s1, a1, .LBB100_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB100_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB100_1
+; RV64I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -8761,16 +8885,9 @@ define i16 @atomicrmw_umax_i16_acquire(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB101_2
 ; RV32I-NEXT:  .LBB101_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s1, a1, .LBB101_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB101_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB101_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB101_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB101_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -8779,8 +8896,17 @@ define i16 @atomicrmw_umax_i16_acquire(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB101_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB101_4
+; RV32I-NEXT:  .LBB101_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s1, a1, .LBB101_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB101_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB101_1
+; RV32I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -8832,16 +8958,9 @@ define i16 @atomicrmw_umax_i16_acquire(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB101_2
 ; RV64I-NEXT:  .LBB101_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s1, a1, .LBB101_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB101_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB101_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB101_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB101_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -8850,8 +8969,17 @@ define i16 @atomicrmw_umax_i16_acquire(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB101_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB101_4
+; RV64I-NEXT:  .LBB101_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s1, a1, .LBB101_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB101_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB101_1
+; RV64I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -8907,16 +9035,9 @@ define i16 @atomicrmw_umax_i16_release(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB102_2
 ; RV32I-NEXT:  .LBB102_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s1, a1, .LBB102_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB102_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB102_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB102_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB102_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -8925,8 +9046,17 @@ define i16 @atomicrmw_umax_i16_release(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB102_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB102_4
+; RV32I-NEXT:  .LBB102_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s1, a1, .LBB102_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB102_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB102_1
+; RV32I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -8978,16 +9108,9 @@ define i16 @atomicrmw_umax_i16_release(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB102_2
 ; RV64I-NEXT:  .LBB102_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s1, a1, .LBB102_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB102_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB102_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB102_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB102_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -8996,8 +9119,17 @@ define i16 @atomicrmw_umax_i16_release(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB102_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB102_4
+; RV64I-NEXT:  .LBB102_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s1, a1, .LBB102_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB102_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB102_1
+; RV64I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9053,16 +9185,9 @@ define i16 @atomicrmw_umax_i16_acq_rel(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB103_2
 ; RV32I-NEXT:  .LBB103_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s1, a1, .LBB103_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB103_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB103_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB103_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB103_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9071,8 +9196,17 @@ define i16 @atomicrmw_umax_i16_acq_rel(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB103_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB103_4
+; RV32I-NEXT:  .LBB103_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s1, a1, .LBB103_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB103_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB103_1
+; RV32I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9124,16 +9258,9 @@ define i16 @atomicrmw_umax_i16_acq_rel(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB103_2
 ; RV64I-NEXT:  .LBB103_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s1, a1, .LBB103_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB103_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB103_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB103_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB103_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9142,8 +9269,17 @@ define i16 @atomicrmw_umax_i16_acq_rel(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB103_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB103_4
+; RV64I-NEXT:  .LBB103_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s1, a1, .LBB103_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB103_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB103_1
+; RV64I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9199,16 +9335,9 @@ define i16 @atomicrmw_umax_i16_seq_cst(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB104_2
 ; RV32I-NEXT:  .LBB104_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bltu s1, a1, .LBB104_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB104_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB104_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB104_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB104_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9217,8 +9346,17 @@ define i16 @atomicrmw_umax_i16_seq_cst(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB104_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB104_4
+; RV32I-NEXT:  .LBB104_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bltu s1, a1, .LBB104_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB104_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB104_1
+; RV32I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9270,16 +9408,9 @@ define i16 @atomicrmw_umax_i16_seq_cst(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB104_2
 ; RV64I-NEXT:  .LBB104_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s1, a1, .LBB104_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB104_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB104_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB104_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB104_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9288,8 +9419,17 @@ define i16 @atomicrmw_umax_i16_seq_cst(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB104_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB104_4
+; RV64I-NEXT:  .LBB104_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s1, a1, .LBB104_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB104_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB104_1
+; RV64I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9345,16 +9485,9 @@ define i16 @atomicrmw_umin_i16_monotonic
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB105_2
 ; RV32I-NEXT:  .LBB105_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s1, a1, .LBB105_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB105_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB105_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB105_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB105_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9363,8 +9496,17 @@ define i16 @atomicrmw_umin_i16_monotonic
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB105_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB105_4
+; RV32I-NEXT:  .LBB105_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s1, a1, .LBB105_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB105_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB105_1
+; RV32I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9416,16 +9558,9 @@ define i16 @atomicrmw_umin_i16_monotonic
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB105_2
 ; RV64I-NEXT:  .LBB105_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s1, a1, .LBB105_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB105_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB105_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB105_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB105_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9434,8 +9569,17 @@ define i16 @atomicrmw_umin_i16_monotonic
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB105_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB105_4
+; RV64I-NEXT:  .LBB105_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s1, a1, .LBB105_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB105_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB105_1
+; RV64I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9491,16 +9635,9 @@ define i16 @atomicrmw_umin_i16_acquire(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB106_2
 ; RV32I-NEXT:  .LBB106_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s1, a1, .LBB106_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB106_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB106_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB106_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB106_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9509,8 +9646,17 @@ define i16 @atomicrmw_umin_i16_acquire(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB106_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB106_4
+; RV32I-NEXT:  .LBB106_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s1, a1, .LBB106_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB106_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB106_1
+; RV32I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9562,16 +9708,9 @@ define i16 @atomicrmw_umin_i16_acquire(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB106_2
 ; RV64I-NEXT:  .LBB106_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s1, a1, .LBB106_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB106_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB106_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB106_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB106_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9580,8 +9719,17 @@ define i16 @atomicrmw_umin_i16_acquire(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB106_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB106_4
+; RV64I-NEXT:  .LBB106_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s1, a1, .LBB106_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB106_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB106_1
+; RV64I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9637,16 +9785,9 @@ define i16 @atomicrmw_umin_i16_release(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB107_2
 ; RV32I-NEXT:  .LBB107_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s1, a1, .LBB107_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB107_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB107_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB107_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB107_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9655,8 +9796,17 @@ define i16 @atomicrmw_umin_i16_release(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB107_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB107_4
+; RV32I-NEXT:  .LBB107_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s1, a1, .LBB107_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB107_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB107_1
+; RV32I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9708,16 +9858,9 @@ define i16 @atomicrmw_umin_i16_release(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB107_2
 ; RV64I-NEXT:  .LBB107_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s1, a1, .LBB107_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB107_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB107_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB107_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB107_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9726,8 +9869,17 @@ define i16 @atomicrmw_umin_i16_release(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB107_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB107_4
+; RV64I-NEXT:  .LBB107_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s1, a1, .LBB107_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB107_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB107_1
+; RV64I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9783,16 +9935,9 @@ define i16 @atomicrmw_umin_i16_acq_rel(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB108_2
 ; RV32I-NEXT:  .LBB108_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s1, a1, .LBB108_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB108_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB108_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB108_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB108_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9801,8 +9946,17 @@ define i16 @atomicrmw_umin_i16_acq_rel(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB108_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB108_4
+; RV32I-NEXT:  .LBB108_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s1, a1, .LBB108_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB108_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB108_1
+; RV32I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -9854,16 +10008,9 @@ define i16 @atomicrmw_umin_i16_acq_rel(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB108_2
 ; RV64I-NEXT:  .LBB108_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s1, a1, .LBB108_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB108_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB108_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB108_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB108_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -9872,8 +10019,17 @@ define i16 @atomicrmw_umin_i16_acq_rel(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB108_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB108_4
+; RV64I-NEXT:  .LBB108_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s1, a1, .LBB108_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB108_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB108_1
+; RV64I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -9929,16 +10085,9 @@ define i16 @atomicrmw_umin_i16_seq_cst(i
 ; RV32I-NEXT:    addi s0, a1, -1
 ; RV32I-NEXT:    and s1, s2, s0
 ; RV32I-NEXT:    addi s3, sp, 6
+; RV32I-NEXT:    j .LBB109_2
 ; RV32I-NEXT:  .LBB109_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a1, a0, s0
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgeu s1, a1, .LBB109_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB109_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB109_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB109_1 Depth=1
+; RV32I-NEXT:    # in Loop: Header=BB109_2 Depth=1
 ; RV32I-NEXT:    sh a0, 6(sp)
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
@@ -9947,8 +10096,17 @@ define i16 @atomicrmw_umin_i16_seq_cst(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_2
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    lh a0, 6(sp)
-; RV32I-NEXT:    beqz a1, .LBB109_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a1, .LBB109_4
+; RV32I-NEXT:  .LBB109_2: # %atomicrmw.start
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    bgeu s1, a1, .LBB109_1
+; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB109_2 Depth=1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    j .LBB109_1
+; RV32I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV32I-NEXT:    lw s4, 8(sp)
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -10000,16 +10158,9 @@ define i16 @atomicrmw_umin_i16_seq_cst(i
 ; RV64I-NEXT:    addiw s0, a1, -1
 ; RV64I-NEXT:    and s1, s2, s0
 ; RV64I-NEXT:    addi s3, sp, 14
+; RV64I-NEXT:    j .LBB109_2
 ; RV64I-NEXT:  .LBB109_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a1, a0, s0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s1, a1, .LBB109_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB109_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB109_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB109_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB109_2 Depth=1
 ; RV64I-NEXT:    sh a0, 14(sp)
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    mv a1, s3
@@ -10018,8 +10169,17 @@ define i16 @atomicrmw_umin_i16_seq_cst(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_2
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lh a0, 14(sp)
-; RV64I-NEXT:    beqz a1, .LBB109_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB109_4
+; RV64I-NEXT:  .LBB109_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    and a1, a0, s0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s1, a1, .LBB109_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB109_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB109_1
+; RV64I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s4, 16(sp)
 ; RV64I-NEXT:    ld s3, 24(sp)
 ; RV64I-NEXT:    ld s2, 32(sp)
@@ -11340,23 +11500,25 @@ define i32 @atomicrmw_max_i32_monotonic(
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB145_3
 ; RV32I-NEXT:  .LBB145_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    blt s0, a2, .LBB145_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB145_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB145_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB145_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    mv a3, zero
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB145_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB145_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB145_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB145_1
+; RV32I-NEXT:  .LBB145_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB145_1
+; RV32I-NEXT:  .LBB145_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11383,16 +11545,9 @@ define i32 @atomicrmw_max_i32_monotonic(
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB145_2
 ; RV64I-NEXT:  .LBB145_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB145_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB145_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB145_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB145_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB145_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11401,8 +11556,17 @@ define i32 @atomicrmw_max_i32_monotonic(
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB145_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB145_4
+; RV64I-NEXT:  .LBB145_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB145_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB145_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB145_1
+; RV64I-NEXT:  .LBB145_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11431,23 +11595,25 @@ define i32 @atomicrmw_max_i32_acquire(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB146_3
 ; RV32I-NEXT:  .LBB146_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    blt s0, a2, .LBB146_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB146_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB146_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB146_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 2
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB146_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB146_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB146_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB146_1
+; RV32I-NEXT:  .LBB146_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB146_1
+; RV32I-NEXT:  .LBB146_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11474,16 +11640,9 @@ define i32 @atomicrmw_max_i32_acquire(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB146_2
 ; RV64I-NEXT:  .LBB146_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB146_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB146_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB146_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB146_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB146_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11492,8 +11651,17 @@ define i32 @atomicrmw_max_i32_acquire(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB146_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB146_4
+; RV64I-NEXT:  .LBB146_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB146_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB146_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB146_1
+; RV64I-NEXT:  .LBB146_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11522,23 +11690,25 @@ define i32 @atomicrmw_max_i32_release(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB147_3
 ; RV32I-NEXT:  .LBB147_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    blt s0, a2, .LBB147_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB147_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB147_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB147_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 3
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB147_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB147_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB147_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB147_1
+; RV32I-NEXT:  .LBB147_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB147_1
+; RV32I-NEXT:  .LBB147_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11565,16 +11735,9 @@ define i32 @atomicrmw_max_i32_release(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB147_2
 ; RV64I-NEXT:  .LBB147_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB147_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB147_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB147_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB147_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB147_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11583,8 +11746,17 @@ define i32 @atomicrmw_max_i32_release(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB147_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB147_4
+; RV64I-NEXT:  .LBB147_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB147_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB147_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB147_1
+; RV64I-NEXT:  .LBB147_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11613,23 +11785,25 @@ define i32 @atomicrmw_max_i32_acq_rel(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB148_3
 ; RV32I-NEXT:  .LBB148_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    blt s0, a2, .LBB148_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB148_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB148_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB148_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 4
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB148_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB148_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB148_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB148_1
+; RV32I-NEXT:  .LBB148_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB148_1
+; RV32I-NEXT:  .LBB148_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11656,16 +11830,9 @@ define i32 @atomicrmw_max_i32_acq_rel(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB148_2
 ; RV64I-NEXT:  .LBB148_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB148_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB148_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB148_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB148_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB148_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11674,8 +11841,17 @@ define i32 @atomicrmw_max_i32_acq_rel(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB148_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB148_4
+; RV64I-NEXT:  .LBB148_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB148_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB148_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB148_1
+; RV64I-NEXT:  .LBB148_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11704,23 +11880,25 @@ define i32 @atomicrmw_max_i32_seq_cst(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB149_3
 ; RV32I-NEXT:  .LBB149_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    blt s0, a2, .LBB149_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB149_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB149_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB149_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 5
 ; RV32I-NEXT:    addi a4, zero, 5
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB149_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB149_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB149_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB149_1
+; RV32I-NEXT:  .LBB149_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB149_1
+; RV32I-NEXT:  .LBB149_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11747,16 +11925,9 @@ define i32 @atomicrmw_max_i32_seq_cst(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB149_2
 ; RV64I-NEXT:  .LBB149_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    blt s0, a1, .LBB149_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB149_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB149_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB149_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB149_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11765,8 +11936,17 @@ define i32 @atomicrmw_max_i32_seq_cst(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB149_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB149_4
+; RV64I-NEXT:  .LBB149_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    blt s0, a1, .LBB149_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB149_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB149_1
+; RV64I-NEXT:  .LBB149_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11795,23 +11975,25 @@ define i32 @atomicrmw_min_i32_monotonic(
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB150_3
 ; RV32I-NEXT:  .LBB150_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bge s0, a2, .LBB150_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB150_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB150_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB150_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    mv a3, zero
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB150_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB150_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB150_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB150_1
+; RV32I-NEXT:  .LBB150_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB150_1
+; RV32I-NEXT:  .LBB150_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11838,16 +12020,9 @@ define i32 @atomicrmw_min_i32_monotonic(
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB150_2
 ; RV64I-NEXT:  .LBB150_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB150_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB150_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB150_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB150_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB150_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11856,8 +12031,17 @@ define i32 @atomicrmw_min_i32_monotonic(
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB150_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB150_4
+; RV64I-NEXT:  .LBB150_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB150_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB150_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB150_1
+; RV64I-NEXT:  .LBB150_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11886,23 +12070,25 @@ define i32 @atomicrmw_min_i32_acquire(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB151_3
 ; RV32I-NEXT:  .LBB151_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bge s0, a2, .LBB151_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB151_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB151_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB151_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 2
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB151_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB151_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB151_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB151_1
+; RV32I-NEXT:  .LBB151_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB151_1
+; RV32I-NEXT:  .LBB151_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -11929,16 +12115,9 @@ define i32 @atomicrmw_min_i32_acquire(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB151_2
 ; RV64I-NEXT:  .LBB151_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB151_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB151_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB151_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB151_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB151_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -11947,8 +12126,17 @@ define i32 @atomicrmw_min_i32_acquire(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB151_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB151_4
+; RV64I-NEXT:  .LBB151_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB151_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB151_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB151_1
+; RV64I-NEXT:  .LBB151_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -11977,23 +12165,25 @@ define i32 @atomicrmw_min_i32_release(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB152_3
 ; RV32I-NEXT:  .LBB152_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bge s0, a2, .LBB152_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB152_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB152_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB152_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 3
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB152_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB152_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB152_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB152_1
+; RV32I-NEXT:  .LBB152_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB152_1
+; RV32I-NEXT:  .LBB152_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12020,16 +12210,9 @@ define i32 @atomicrmw_min_i32_release(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB152_2
 ; RV64I-NEXT:  .LBB152_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB152_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB152_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB152_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB152_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB152_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12038,8 +12221,17 @@ define i32 @atomicrmw_min_i32_release(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB152_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB152_4
+; RV64I-NEXT:  .LBB152_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB152_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB152_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB152_1
+; RV64I-NEXT:  .LBB152_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12068,23 +12260,25 @@ define i32 @atomicrmw_min_i32_acq_rel(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB153_3
 ; RV32I-NEXT:  .LBB153_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bge s0, a2, .LBB153_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB153_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB153_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB153_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 4
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB153_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB153_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB153_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB153_1
+; RV32I-NEXT:  .LBB153_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB153_1
+; RV32I-NEXT:  .LBB153_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12111,16 +12305,9 @@ define i32 @atomicrmw_min_i32_acq_rel(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB153_2
 ; RV64I-NEXT:  .LBB153_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB153_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB153_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB153_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB153_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB153_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12129,8 +12316,17 @@ define i32 @atomicrmw_min_i32_acq_rel(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB153_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB153_4
+; RV64I-NEXT:  .LBB153_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB153_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB153_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB153_1
+; RV64I-NEXT:  .LBB153_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12159,23 +12355,25 @@ define i32 @atomicrmw_min_i32_seq_cst(i3
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    blt s0, a2, .LBB154_3
 ; RV32I-NEXT:  .LBB154_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bge s0, a2, .LBB154_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB154_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB154_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB154_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 5
 ; RV32I-NEXT:    addi a4, zero, 5
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB154_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB154_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB154_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bge s0, a2, .LBB154_1
+; RV32I-NEXT:  .LBB154_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB154_1
+; RV32I-NEXT:  .LBB154_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12202,16 +12400,9 @@ define i32 @atomicrmw_min_i32_seq_cst(i3
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB154_2
 ; RV64I-NEXT:  .LBB154_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bge s0, a1, .LBB154_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB154_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB154_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB154_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB154_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12220,8 +12411,17 @@ define i32 @atomicrmw_min_i32_seq_cst(i3
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB154_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB154_4
+; RV64I-NEXT:  .LBB154_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bge s0, a1, .LBB154_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB154_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB154_1
+; RV64I-NEXT:  .LBB154_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12250,23 +12450,25 @@ define i32 @atomicrmw_umax_i32_monotonic
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB155_3
 ; RV32I-NEXT:  .LBB155_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bltu s0, a2, .LBB155_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB155_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB155_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB155_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    mv a3, zero
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB155_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB155_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB155_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB155_1
+; RV32I-NEXT:  .LBB155_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB155_1
+; RV32I-NEXT:  .LBB155_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12293,16 +12495,9 @@ define i32 @atomicrmw_umax_i32_monotonic
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB155_2
 ; RV64I-NEXT:  .LBB155_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB155_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB155_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB155_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB155_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB155_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12311,8 +12506,17 @@ define i32 @atomicrmw_umax_i32_monotonic
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB155_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB155_4
+; RV64I-NEXT:  .LBB155_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB155_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB155_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB155_1
+; RV64I-NEXT:  .LBB155_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12341,23 +12545,25 @@ define i32 @atomicrmw_umax_i32_acquire(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB156_3
 ; RV32I-NEXT:  .LBB156_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bltu s0, a2, .LBB156_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB156_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB156_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB156_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 2
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB156_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB156_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB156_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB156_1
+; RV32I-NEXT:  .LBB156_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB156_1
+; RV32I-NEXT:  .LBB156_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12384,16 +12590,9 @@ define i32 @atomicrmw_umax_i32_acquire(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB156_2
 ; RV64I-NEXT:  .LBB156_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB156_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB156_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB156_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB156_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB156_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12402,8 +12601,17 @@ define i32 @atomicrmw_umax_i32_acquire(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB156_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB156_4
+; RV64I-NEXT:  .LBB156_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB156_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB156_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB156_1
+; RV64I-NEXT:  .LBB156_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12432,23 +12640,25 @@ define i32 @atomicrmw_umax_i32_release(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB157_3
 ; RV32I-NEXT:  .LBB157_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bltu s0, a2, .LBB157_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB157_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB157_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB157_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 3
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB157_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB157_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB157_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB157_1
+; RV32I-NEXT:  .LBB157_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB157_1
+; RV32I-NEXT:  .LBB157_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12475,16 +12685,9 @@ define i32 @atomicrmw_umax_i32_release(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB157_2
 ; RV64I-NEXT:  .LBB157_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB157_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB157_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB157_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB157_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB157_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12493,8 +12696,17 @@ define i32 @atomicrmw_umax_i32_release(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB157_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB157_4
+; RV64I-NEXT:  .LBB157_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB157_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB157_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB157_1
+; RV64I-NEXT:  .LBB157_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12523,23 +12735,25 @@ define i32 @atomicrmw_umax_i32_acq_rel(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB158_3
 ; RV32I-NEXT:  .LBB158_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bltu s0, a2, .LBB158_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB158_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB158_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB158_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 4
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB158_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB158_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB158_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB158_1
+; RV32I-NEXT:  .LBB158_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB158_1
+; RV32I-NEXT:  .LBB158_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12566,16 +12780,9 @@ define i32 @atomicrmw_umax_i32_acq_rel(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB158_2
 ; RV64I-NEXT:  .LBB158_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB158_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB158_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB158_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB158_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB158_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12584,8 +12791,17 @@ define i32 @atomicrmw_umax_i32_acq_rel(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB158_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB158_4
+; RV64I-NEXT:  .LBB158_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB158_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB158_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB158_1
+; RV64I-NEXT:  .LBB158_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12614,23 +12830,25 @@ define i32 @atomicrmw_umax_i32_seq_cst(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB159_3
 ; RV32I-NEXT:  .LBB159_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bltu s0, a2, .LBB159_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB159_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB159_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB159_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 5
 ; RV32I-NEXT:    addi a4, zero, 5
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB159_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB159_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB159_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB159_1
+; RV32I-NEXT:  .LBB159_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB159_1
+; RV32I-NEXT:  .LBB159_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12657,16 +12875,9 @@ define i32 @atomicrmw_umax_i32_seq_cst(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB159_2
 ; RV64I-NEXT:  .LBB159_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bltu s0, a1, .LBB159_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB159_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB159_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB159_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB159_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12675,8 +12886,17 @@ define i32 @atomicrmw_umax_i32_seq_cst(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB159_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB159_4
+; RV64I-NEXT:  .LBB159_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bltu s0, a1, .LBB159_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB159_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB159_1
+; RV64I-NEXT:  .LBB159_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12705,23 +12925,25 @@ define i32 @atomicrmw_umin_i32_monotonic
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB160_3
 ; RV32I-NEXT:  .LBB160_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bgeu s0, a2, .LBB160_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB160_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB160_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB160_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    mv a3, zero
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB160_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB160_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB160_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB160_1
+; RV32I-NEXT:  .LBB160_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB160_1
+; RV32I-NEXT:  .LBB160_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12748,16 +12970,9 @@ define i32 @atomicrmw_umin_i32_monotonic
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB160_2
 ; RV64I-NEXT:  .LBB160_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB160_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB160_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB160_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB160_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB160_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12766,8 +12981,17 @@ define i32 @atomicrmw_umin_i32_monotonic
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB160_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB160_4
+; RV64I-NEXT:  .LBB160_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB160_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB160_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB160_1
+; RV64I-NEXT:  .LBB160_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12796,23 +13020,25 @@ define i32 @atomicrmw_umin_i32_acquire(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB161_3
 ; RV32I-NEXT:  .LBB161_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bgeu s0, a2, .LBB161_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB161_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB161_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB161_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 2
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB161_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB161_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB161_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB161_1
+; RV32I-NEXT:  .LBB161_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB161_1
+; RV32I-NEXT:  .LBB161_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12839,16 +13065,9 @@ define i32 @atomicrmw_umin_i32_acquire(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB161_2
 ; RV64I-NEXT:  .LBB161_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB161_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB161_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB161_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB161_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB161_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12857,8 +13076,17 @@ define i32 @atomicrmw_umin_i32_acquire(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB161_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB161_4
+; RV64I-NEXT:  .LBB161_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB161_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB161_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB161_1
+; RV64I-NEXT:  .LBB161_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12887,23 +13115,25 @@ define i32 @atomicrmw_umin_i32_release(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB162_3
 ; RV32I-NEXT:  .LBB162_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bgeu s0, a2, .LBB162_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB162_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB162_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB162_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 3
 ; RV32I-NEXT:    mv a4, zero
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB162_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB162_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB162_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB162_1
+; RV32I-NEXT:  .LBB162_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB162_1
+; RV32I-NEXT:  .LBB162_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -12930,16 +13160,9 @@ define i32 @atomicrmw_umin_i32_release(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB162_2
 ; RV64I-NEXT:  .LBB162_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB162_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB162_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB162_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB162_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB162_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -12948,8 +13171,17 @@ define i32 @atomicrmw_umin_i32_release(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB162_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB162_4
+; RV64I-NEXT:  .LBB162_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB162_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB162_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB162_1
+; RV64I-NEXT:  .LBB162_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -12978,23 +13210,25 @@ define i32 @atomicrmw_umin_i32_acq_rel(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB163_3
 ; RV32I-NEXT:  .LBB163_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bgeu s0, a2, .LBB163_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB163_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB163_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB163_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 4
 ; RV32I-NEXT:    addi a4, zero, 2
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB163_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB163_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB163_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB163_1
+; RV32I-NEXT:  .LBB163_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB163_1
+; RV32I-NEXT:  .LBB163_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -13021,16 +13255,9 @@ define i32 @atomicrmw_umin_i32_acq_rel(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB163_2
 ; RV64I-NEXT:  .LBB163_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB163_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB163_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB163_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB163_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB163_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -13039,8 +13266,17 @@ define i32 @atomicrmw_umin_i32_acq_rel(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB163_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB163_4
+; RV64I-NEXT:  .LBB163_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB163_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB163_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB163_1
+; RV64I-NEXT:  .LBB163_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -13069,23 +13305,25 @@ define i32 @atomicrmw_umin_i32_seq_cst(i
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    addi s2, sp, 12
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bltu s0, a2, .LBB164_3
 ; RV32I-NEXT:  .LBB164_1: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sw a2, 12(sp)
-; RV32I-NEXT:    bgeu s0, a2, .LBB164_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB164_1 Depth=1
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:  .LBB164_3: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB164_1 Depth=1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    addi a3, zero, 5
 ; RV32I-NEXT:    addi a4, zero, 5
 ; RV32I-NEXT:    call __atomic_compare_exchange_4
 ; RV32I-NEXT:    lw a2, 12(sp)
-; RV32I-NEXT:    beqz a0, .LBB164_1
-; RV32I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB164_4
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    # in Loop: Header=BB164_1 Depth=1
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    bgeu s0, a2, .LBB164_1
+; RV32I-NEXT:  .LBB164_3: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    j .LBB164_1
+; RV32I-NEXT:  .LBB164_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s2, 16(sp)
 ; RV32I-NEXT:    lw s1, 20(sp)
@@ -13112,16 +13350,9 @@ define i32 @atomicrmw_umin_i32_seq_cst(i
 ; RV64I-NEXT:    lwu a0, 0(a0)
 ; RV64I-NEXT:    sext.w s0, a1
 ; RV64I-NEXT:    addi s3, sp, 4
+; RV64I-NEXT:    j .LBB164_2
 ; RV64I-NEXT:  .LBB164_1: # %atomicrmw.start
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    bgeu s0, a1, .LBB164_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB164_1 Depth=1
-; RV64I-NEXT:    mv a2, s2
-; RV64I-NEXT:  .LBB164_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB164_1 Depth=1
+; RV64I-NEXT:    # in Loop: Header=BB164_2 Depth=1
 ; RV64I-NEXT:    sw a0, 4(sp)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s3
@@ -13130,8 +13361,17 @@ define i32 @atomicrmw_umin_i32_seq_cst(i
 ; RV64I-NEXT:    call __atomic_compare_exchange_4
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    lw a0, 4(sp)
-; RV64I-NEXT:    beqz a1, .LBB164_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a1, .LBB164_4
+; RV64I-NEXT:  .LBB164_2: # %atomicrmw.start
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    bgeu s0, a1, .LBB164_1
+; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB164_2 Depth=1
+; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    j .LBB164_1
+; RV64I-NEXT:  .LBB164_4: # %atomicrmw.end
 ; RV64I-NEXT:    ld s3, 8(sp)
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -14568,26 +14808,9 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB200_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB200_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB200_3
 ; RV32I-NEXT:    j .LBB200_4
-; RV32I-NEXT:  .LBB200_3: # in Loop: Header=BB200_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB200_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB200_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB200_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32I-NEXT:  .LBB200_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -14596,8 +14819,23 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB200_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB200_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB200_4
+; RV32I-NEXT:  .LBB200_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB200_5
+; RV32I-NEXT:  .LBB200_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB200_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB200_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB200_1
+; RV32I-NEXT:  .LBB200_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -14621,26 +14859,9 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB200_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB200_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB200_3
 ; RV32IA-NEXT:    j .LBB200_4
-; RV32IA-NEXT:  .LBB200_3: # in Loop: Header=BB200_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB200_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB200_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB200_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:  .LBB200_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -14649,8 +14870,23 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB200_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB200_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB200_4
+; RV32IA-NEXT:  .LBB200_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB200_5
+; RV32IA-NEXT:  .LBB200_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB200_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB200_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB200_1
+; RV32IA-NEXT:  .LBB200_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -14671,23 +14907,25 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB200_3
 ; RV64I-NEXT:  .LBB200_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    blt s0, a2, .LBB200_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB200_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB200_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    mv a3, zero
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB200_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB200_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB200_1
+; RV64I-NEXT:  .LBB200_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB200_1
+; RV64I-NEXT:  .LBB200_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -14719,26 +14957,9 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB201_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB201_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB201_3
 ; RV32I-NEXT:    j .LBB201_4
-; RV32I-NEXT:  .LBB201_3: # in Loop: Header=BB201_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB201_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB201_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB201_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32I-NEXT:  .LBB201_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -14747,13 +14968,28 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB201_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
-; RV32I-NEXT:    mv a0, a2
-; RV32I-NEXT:    lw s3, 12(sp)
-; RV32I-NEXT:    lw s2, 16(sp)
-; RV32I-NEXT:    lw s1, 20(sp)
-; RV32I-NEXT:    lw s0, 24(sp)
+; RV32I-NEXT:    bnez a0, .LBB201_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB201_4
+; RV32I-NEXT:  .LBB201_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB201_5
+; RV32I-NEXT:  .LBB201_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB201_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB201_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB201_1
+; RV32I-NEXT:  .LBB201_7: # %atomicrmw.end
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    lw s3, 12(sp)
+; RV32I-NEXT:    lw s2, 16(sp)
+; RV32I-NEXT:    lw s1, 20(sp)
+; RV32I-NEXT:    lw s0, 24(sp)
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
@@ -14772,26 +15008,9 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB201_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB201_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB201_3
 ; RV32IA-NEXT:    j .LBB201_4
-; RV32IA-NEXT:  .LBB201_3: # in Loop: Header=BB201_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB201_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB201_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB201_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:  .LBB201_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -14800,8 +15019,23 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB201_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB201_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB201_4
+; RV32IA-NEXT:  .LBB201_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB201_5
+; RV32IA-NEXT:  .LBB201_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB201_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB201_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB201_1
+; RV32IA-NEXT:  .LBB201_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -14822,23 +15056,25 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB201_3
 ; RV64I-NEXT:  .LBB201_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    blt s0, a2, .LBB201_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB201_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB201_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 2
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB201_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB201_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB201_1
+; RV64I-NEXT:  .LBB201_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB201_1
+; RV64I-NEXT:  .LBB201_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -14870,26 +15106,9 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB202_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB202_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB202_3
 ; RV32I-NEXT:    j .LBB202_4
-; RV32I-NEXT:  .LBB202_3: # in Loop: Header=BB202_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB202_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB202_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB202_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32I-NEXT:  .LBB202_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -14898,8 +15117,23 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB202_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB202_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB202_4
+; RV32I-NEXT:  .LBB202_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB202_5
+; RV32I-NEXT:  .LBB202_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB202_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB202_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB202_1
+; RV32I-NEXT:  .LBB202_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -14923,26 +15157,9 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB202_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB202_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB202_3
 ; RV32IA-NEXT:    j .LBB202_4
-; RV32IA-NEXT:  .LBB202_3: # in Loop: Header=BB202_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB202_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB202_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB202_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:  .LBB202_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -14951,8 +15168,23 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB202_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB202_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB202_4
+; RV32IA-NEXT:  .LBB202_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB202_5
+; RV32IA-NEXT:  .LBB202_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB202_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB202_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB202_1
+; RV32IA-NEXT:  .LBB202_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -14973,23 +15205,25 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB202_3
 ; RV64I-NEXT:  .LBB202_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    blt s0, a2, .LBB202_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB202_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB202_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 3
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB202_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB202_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB202_1
+; RV64I-NEXT:  .LBB202_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB202_1
+; RV64I-NEXT:  .LBB202_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15021,26 +15255,9 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB203_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB203_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB203_3
 ; RV32I-NEXT:    j .LBB203_4
-; RV32I-NEXT:  .LBB203_3: # in Loop: Header=BB203_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB203_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB203_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB203_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32I-NEXT:  .LBB203_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15049,8 +15266,23 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB203_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB203_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB203_4
+; RV32I-NEXT:  .LBB203_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB203_5
+; RV32I-NEXT:  .LBB203_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB203_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB203_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB203_1
+; RV32I-NEXT:  .LBB203_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15074,26 +15306,9 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB203_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB203_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB203_3
 ; RV32IA-NEXT:    j .LBB203_4
-; RV32IA-NEXT:  .LBB203_3: # in Loop: Header=BB203_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB203_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB203_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB203_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:  .LBB203_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15102,8 +15317,23 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB203_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB203_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB203_4
+; RV32IA-NEXT:  .LBB203_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB203_5
+; RV32IA-NEXT:  .LBB203_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB203_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB203_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB203_1
+; RV32IA-NEXT:  .LBB203_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15124,23 +15354,25 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB203_3
 ; RV64I-NEXT:  .LBB203_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    blt s0, a2, .LBB203_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB203_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB203_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 4
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB203_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB203_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB203_1
+; RV64I-NEXT:  .LBB203_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB203_1
+; RV64I-NEXT:  .LBB203_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15172,26 +15404,9 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB204_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB204_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB204_3
 ; RV32I-NEXT:    j .LBB204_4
-; RV32I-NEXT:  .LBB204_3: # in Loop: Header=BB204_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB204_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB204_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB204_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32I-NEXT:  .LBB204_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15200,8 +15415,23 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB204_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB204_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB204_4
+; RV32I-NEXT:  .LBB204_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB204_5
+; RV32I-NEXT:  .LBB204_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB204_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB204_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB204_1
+; RV32I-NEXT:  .LBB204_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15225,26 +15455,9 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB204_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB204_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB204_3
 ; RV32IA-NEXT:    j .LBB204_4
-; RV32IA-NEXT:  .LBB204_3: # in Loop: Header=BB204_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB204_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB204_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB204_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:  .LBB204_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15253,8 +15466,23 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB204_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB204_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB204_4
+; RV32IA-NEXT:  .LBB204_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB204_5
+; RV32IA-NEXT:  .LBB204_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB204_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB204_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB204_1
+; RV32IA-NEXT:  .LBB204_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15275,23 +15503,25 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB204_3
 ; RV64I-NEXT:  .LBB204_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    blt s0, a2, .LBB204_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB204_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB204_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 5
 ; RV64I-NEXT:    addi a4, zero, 5
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB204_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB204_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB204_1
+; RV64I-NEXT:  .LBB204_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB204_1
+; RV64I-NEXT:  .LBB204_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15323,27 +15553,9 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB205_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB205_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB205_3
 ; RV32I-NEXT:    j .LBB205_4
-; RV32I-NEXT:  .LBB205_3: # in Loop: Header=BB205_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB205_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB205_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB205_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32I-NEXT:  .LBB205_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15352,8 +15564,24 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB205_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB205_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB205_4
+; RV32I-NEXT:  .LBB205_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB205_5
+; RV32I-NEXT:  .LBB205_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB205_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB205_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB205_1
+; RV32I-NEXT:  .LBB205_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15377,27 +15605,9 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB205_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB205_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB205_3
 ; RV32IA-NEXT:    j .LBB205_4
-; RV32IA-NEXT:  .LBB205_3: # in Loop: Header=BB205_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB205_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB205_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB205_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:  .LBB205_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15406,8 +15616,24 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB205_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB205_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB205_4
+; RV32IA-NEXT:  .LBB205_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB205_5
+; RV32IA-NEXT:  .LBB205_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB205_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB205_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB205_1
+; RV32IA-NEXT:  .LBB205_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15428,23 +15654,25 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB205_3
 ; RV64I-NEXT:  .LBB205_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bge s0, a2, .LBB205_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB205_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    mv a3, zero
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB205_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB205_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB205_1
+; RV64I-NEXT:  .LBB205_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB205_1
+; RV64I-NEXT:  .LBB205_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15476,27 +15704,9 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB206_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB206_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB206_3
 ; RV32I-NEXT:    j .LBB206_4
-; RV32I-NEXT:  .LBB206_3: # in Loop: Header=BB206_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB206_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB206_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB206_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32I-NEXT:  .LBB206_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15505,8 +15715,24 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB206_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB206_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB206_4
+; RV32I-NEXT:  .LBB206_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB206_5
+; RV32I-NEXT:  .LBB206_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB206_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB206_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB206_1
+; RV32I-NEXT:  .LBB206_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15530,27 +15756,9 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB206_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB206_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB206_3
 ; RV32IA-NEXT:    j .LBB206_4
-; RV32IA-NEXT:  .LBB206_3: # in Loop: Header=BB206_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB206_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB206_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB206_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:  .LBB206_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15559,11 +15767,27 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB206_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
-; RV32IA-NEXT:    mv a0, a2
-; RV32IA-NEXT:    lw s3, 12(sp)
-; RV32IA-NEXT:    lw s2, 16(sp)
+; RV32IA-NEXT:    bnez a0, .LBB206_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB206_4
+; RV32IA-NEXT:  .LBB206_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB206_5
+; RV32IA-NEXT:  .LBB206_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB206_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB206_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB206_1
+; RV32IA-NEXT:  .LBB206_7: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s3, 12(sp)
+; RV32IA-NEXT:    lw s2, 16(sp)
 ; RV32IA-NEXT:    lw s1, 20(sp)
 ; RV32IA-NEXT:    lw s0, 24(sp)
 ; RV32IA-NEXT:    lw ra, 28(sp)
@@ -15581,23 +15805,25 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB206_3
 ; RV64I-NEXT:  .LBB206_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bge s0, a2, .LBB206_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB206_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 2
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB206_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB206_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB206_1
+; RV64I-NEXT:  .LBB206_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB206_1
+; RV64I-NEXT:  .LBB206_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15629,27 +15855,9 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB207_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB207_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB207_3
 ; RV32I-NEXT:    j .LBB207_4
-; RV32I-NEXT:  .LBB207_3: # in Loop: Header=BB207_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB207_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB207_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB207_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32I-NEXT:  .LBB207_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15658,8 +15866,24 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB207_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB207_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB207_4
+; RV32I-NEXT:  .LBB207_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB207_5
+; RV32I-NEXT:  .LBB207_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB207_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB207_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB207_1
+; RV32I-NEXT:  .LBB207_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15683,27 +15907,9 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB207_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB207_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB207_3
 ; RV32IA-NEXT:    j .LBB207_4
-; RV32IA-NEXT:  .LBB207_3: # in Loop: Header=BB207_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB207_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB207_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB207_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:  .LBB207_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15712,8 +15918,24 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB207_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB207_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB207_4
+; RV32IA-NEXT:  .LBB207_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB207_5
+; RV32IA-NEXT:  .LBB207_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB207_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB207_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB207_1
+; RV32IA-NEXT:  .LBB207_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15734,23 +15956,25 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB207_3
 ; RV64I-NEXT:  .LBB207_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bge s0, a2, .LBB207_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB207_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 3
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB207_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB207_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB207_1
+; RV64I-NEXT:  .LBB207_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB207_1
+; RV64I-NEXT:  .LBB207_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15782,27 +16006,9 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB208_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB208_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB208_3
 ; RV32I-NEXT:    j .LBB208_4
-; RV32I-NEXT:  .LBB208_3: # in Loop: Header=BB208_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB208_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB208_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB208_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32I-NEXT:  .LBB208_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15811,8 +16017,24 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB208_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB208_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB208_4
+; RV32I-NEXT:  .LBB208_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB208_5
+; RV32I-NEXT:  .LBB208_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB208_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB208_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB208_1
+; RV32I-NEXT:  .LBB208_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15836,27 +16058,9 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB208_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB208_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB208_3
 ; RV32IA-NEXT:    j .LBB208_4
-; RV32IA-NEXT:  .LBB208_3: # in Loop: Header=BB208_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB208_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB208_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB208_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:  .LBB208_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -15865,8 +16069,24 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB208_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB208_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB208_4
+; RV32IA-NEXT:  .LBB208_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB208_5
+; RV32IA-NEXT:  .LBB208_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB208_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB208_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB208_1
+; RV32IA-NEXT:  .LBB208_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15887,23 +16107,25 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB208_3
 ; RV64I-NEXT:  .LBB208_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bge s0, a2, .LBB208_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB208_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 4
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB208_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB208_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB208_1
+; RV64I-NEXT:  .LBB208_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB208_1
+; RV64I-NEXT:  .LBB208_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -15935,27 +16157,9 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB209_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB209_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB209_3
 ; RV32I-NEXT:    j .LBB209_4
-; RV32I-NEXT:  .LBB209_3: # in Loop: Header=BB209_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB209_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB209_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB209_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32I-NEXT:  .LBB209_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -15964,8 +16168,24 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB209_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB209_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB209_4
+; RV32I-NEXT:  .LBB209_3: # %atomicrmw.start
+; RV32I-NEXT:    slt a0, s0, a1
+; RV32I-NEXT:    j .LBB209_5
+; RV32I-NEXT:  .LBB209_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB209_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB209_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB209_1
+; RV32I-NEXT:  .LBB209_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15989,27 +16209,9 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB209_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB209_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB209_3
 ; RV32IA-NEXT:    j .LBB209_4
-; RV32IA-NEXT:  .LBB209_3: # in Loop: Header=BB209_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB209_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB209_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB209_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:  .LBB209_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16018,8 +16220,24 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB209_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB209_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB209_4
+; RV32IA-NEXT:  .LBB209_3: # %atomicrmw.start
+; RV32IA-NEXT:    slt a0, s0, a1
+; RV32IA-NEXT:    j .LBB209_5
+; RV32IA-NEXT:  .LBB209_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB209_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB209_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB209_1
+; RV32IA-NEXT:  .LBB209_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16040,23 +16258,25 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    blt s0, a2, .LBB209_3
 ; RV64I-NEXT:  .LBB209_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bge s0, a2, .LBB209_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB209_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 5
 ; RV64I-NEXT:    addi a4, zero, 5
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB209_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB209_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bge s0, a2, .LBB209_1
+; RV64I-NEXT:  .LBB209_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB209_1
+; RV64I-NEXT:  .LBB209_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16088,26 +16308,9 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB210_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB210_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB210_3
 ; RV32I-NEXT:    j .LBB210_4
-; RV32I-NEXT:  .LBB210_3: # in Loop: Header=BB210_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB210_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB210_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB210_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32I-NEXT:  .LBB210_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16116,8 +16319,23 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB210_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB210_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB210_4
+; RV32I-NEXT:  .LBB210_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB210_5
+; RV32I-NEXT:  .LBB210_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB210_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB210_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB210_1
+; RV32I-NEXT:  .LBB210_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16141,26 +16359,9 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB210_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB210_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB210_3
 ; RV32IA-NEXT:    j .LBB210_4
-; RV32IA-NEXT:  .LBB210_3: # in Loop: Header=BB210_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB210_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB210_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB210_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:  .LBB210_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16169,8 +16370,23 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB210_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB210_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB210_4
+; RV32IA-NEXT:  .LBB210_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB210_5
+; RV32IA-NEXT:  .LBB210_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB210_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB210_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB210_1
+; RV32IA-NEXT:  .LBB210_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16191,23 +16407,25 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB210_3
 ; RV64I-NEXT:  .LBB210_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bltu s0, a2, .LBB210_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB210_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB210_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    mv a3, zero
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB210_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB210_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB210_1
+; RV64I-NEXT:  .LBB210_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB210_1
+; RV64I-NEXT:  .LBB210_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16239,26 +16457,9 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB211_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB211_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB211_3
 ; RV32I-NEXT:    j .LBB211_4
-; RV32I-NEXT:  .LBB211_3: # in Loop: Header=BB211_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB211_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB211_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB211_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32I-NEXT:  .LBB211_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16267,8 +16468,23 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB211_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB211_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB211_4
+; RV32I-NEXT:  .LBB211_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB211_5
+; RV32I-NEXT:  .LBB211_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB211_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB211_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB211_1
+; RV32I-NEXT:  .LBB211_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16292,26 +16508,9 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB211_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB211_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB211_3
 ; RV32IA-NEXT:    j .LBB211_4
-; RV32IA-NEXT:  .LBB211_3: # in Loop: Header=BB211_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB211_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB211_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB211_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:  .LBB211_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16320,8 +16519,23 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB211_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB211_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB211_4
+; RV32IA-NEXT:  .LBB211_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB211_5
+; RV32IA-NEXT:  .LBB211_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB211_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB211_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB211_1
+; RV32IA-NEXT:  .LBB211_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16342,24 +16556,26 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB211_3
 ; RV64I-NEXT:  .LBB211_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bltu s0, a2, .LBB211_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB211_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB211_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 2
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB211_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
-; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    bnez a0, .LBB211_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB211_1
+; RV64I-NEXT:  .LBB211_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB211_1
+; RV64I-NEXT:  .LBB211_4: # %atomicrmw.end
+; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
 ; RV64I-NEXT:    ld s0, 32(sp)
@@ -16390,26 +16606,9 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB212_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB212_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB212_3
 ; RV32I-NEXT:    j .LBB212_4
-; RV32I-NEXT:  .LBB212_3: # in Loop: Header=BB212_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB212_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB212_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB212_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32I-NEXT:  .LBB212_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16418,8 +16617,23 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB212_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB212_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB212_4
+; RV32I-NEXT:  .LBB212_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB212_5
+; RV32I-NEXT:  .LBB212_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB212_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB212_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB212_1
+; RV32I-NEXT:  .LBB212_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16443,26 +16657,9 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB212_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB212_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB212_3
 ; RV32IA-NEXT:    j .LBB212_4
-; RV32IA-NEXT:  .LBB212_3: # in Loop: Header=BB212_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB212_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB212_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB212_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:  .LBB212_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16471,8 +16668,23 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB212_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB212_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB212_4
+; RV32IA-NEXT:  .LBB212_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB212_5
+; RV32IA-NEXT:  .LBB212_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB212_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB212_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB212_1
+; RV32IA-NEXT:  .LBB212_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16493,23 +16705,25 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB212_3
 ; RV64I-NEXT:  .LBB212_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bltu s0, a2, .LBB212_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB212_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB212_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 3
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB212_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB212_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB212_1
+; RV64I-NEXT:  .LBB212_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB212_1
+; RV64I-NEXT:  .LBB212_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16541,26 +16755,9 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB213_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB213_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB213_3
 ; RV32I-NEXT:    j .LBB213_4
-; RV32I-NEXT:  .LBB213_3: # in Loop: Header=BB213_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB213_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB213_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB213_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32I-NEXT:  .LBB213_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16569,8 +16766,23 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB213_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB213_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB213_4
+; RV32I-NEXT:  .LBB213_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB213_5
+; RV32I-NEXT:  .LBB213_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB213_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB213_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB213_1
+; RV32I-NEXT:  .LBB213_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16594,26 +16806,9 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB213_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB213_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB213_3
 ; RV32IA-NEXT:    j .LBB213_4
-; RV32IA-NEXT:  .LBB213_3: # in Loop: Header=BB213_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB213_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB213_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB213_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:  .LBB213_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16622,8 +16817,23 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB213_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB213_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB213_4
+; RV32IA-NEXT:  .LBB213_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB213_5
+; RV32IA-NEXT:  .LBB213_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB213_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB213_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB213_1
+; RV32IA-NEXT:  .LBB213_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16644,23 +16854,25 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB213_3
 ; RV64I-NEXT:  .LBB213_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bltu s0, a2, .LBB213_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB213_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB213_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 4
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB213_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB213_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB213_1
+; RV64I-NEXT:  .LBB213_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB213_1
+; RV64I-NEXT:  .LBB213_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16692,26 +16904,9 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB214_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB214_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB214_3
 ; RV32I-NEXT:    j .LBB214_4
-; RV32I-NEXT:  .LBB214_3: # in Loop: Header=BB214_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB214_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB214_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB214_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32I-NEXT:  .LBB214_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16720,8 +16915,23 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB214_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB214_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB214_4
+; RV32I-NEXT:  .LBB214_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB214_5
+; RV32I-NEXT:  .LBB214_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB214_5: # %atomicrmw.start
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB214_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB214_1
+; RV32I-NEXT:  .LBB214_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16745,26 +16955,9 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB214_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB214_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB214_3
 ; RV32IA-NEXT:    j .LBB214_4
-; RV32IA-NEXT:  .LBB214_3: # in Loop: Header=BB214_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB214_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB214_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB214_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:  .LBB214_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16773,8 +16966,23 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB214_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB214_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB214_4
+; RV32IA-NEXT:  .LBB214_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB214_5
+; RV32IA-NEXT:  .LBB214_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB214_5: # %atomicrmw.start
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB214_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB214_1
+; RV32IA-NEXT:  .LBB214_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16795,23 +17003,25 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB214_3
 ; RV64I-NEXT:  .LBB214_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bltu s0, a2, .LBB214_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB214_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB214_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 5
 ; RV64I-NEXT:    addi a4, zero, 5
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB214_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB214_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB214_1
+; RV64I-NEXT:  .LBB214_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB214_1
+; RV64I-NEXT:  .LBB214_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16843,27 +17053,9 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB215_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB215_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB215_3
 ; RV32I-NEXT:    j .LBB215_4
-; RV32I-NEXT:  .LBB215_3: # in Loop: Header=BB215_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB215_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB215_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB215_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32I-NEXT:  .LBB215_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -16872,8 +17064,24 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB215_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB215_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB215_4
+; RV32I-NEXT:  .LBB215_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB215_5
+; RV32I-NEXT:  .LBB215_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB215_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB215_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB215_1
+; RV32I-NEXT:  .LBB215_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16897,27 +17105,9 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB215_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB215_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB215_3
 ; RV32IA-NEXT:    j .LBB215_4
-; RV32IA-NEXT:  .LBB215_3: # in Loop: Header=BB215_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB215_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB215_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB215_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:  .LBB215_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -16926,8 +17116,24 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB215_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB215_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB215_4
+; RV32IA-NEXT:  .LBB215_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB215_5
+; RV32IA-NEXT:  .LBB215_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB215_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB215_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB215_1
+; RV32IA-NEXT:  .LBB215_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16948,23 +17154,25 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB215_3
 ; RV64I-NEXT:  .LBB215_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bgeu s0, a2, .LBB215_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB215_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    mv a3, zero
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB215_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB215_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB215_1
+; RV64I-NEXT:  .LBB215_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB215_1
+; RV64I-NEXT:  .LBB215_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -16996,27 +17204,9 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB216_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB216_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB216_3
 ; RV32I-NEXT:    j .LBB216_4
-; RV32I-NEXT:  .LBB216_3: # in Loop: Header=BB216_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB216_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB216_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB216_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32I-NEXT:  .LBB216_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -17025,8 +17215,24 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB216_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB216_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB216_4
+; RV32I-NEXT:  .LBB216_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB216_5
+; RV32I-NEXT:  .LBB216_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB216_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB216_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB216_1
+; RV32I-NEXT:  .LBB216_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17050,27 +17256,9 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB216_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB216_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB216_3
 ; RV32IA-NEXT:    j .LBB216_4
-; RV32IA-NEXT:  .LBB216_3: # in Loop: Header=BB216_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB216_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB216_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB216_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:  .LBB216_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -17079,8 +17267,24 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB216_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB216_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB216_4
+; RV32IA-NEXT:  .LBB216_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB216_5
+; RV32IA-NEXT:  .LBB216_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB216_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB216_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB216_1
+; RV32IA-NEXT:  .LBB216_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17101,23 +17305,25 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB216_3
 ; RV64I-NEXT:  .LBB216_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bgeu s0, a2, .LBB216_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB216_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 2
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB216_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB216_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB216_1
+; RV64I-NEXT:  .LBB216_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB216_1
+; RV64I-NEXT:  .LBB216_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -17149,27 +17355,9 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB217_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB217_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB217_3
 ; RV32I-NEXT:    j .LBB217_4
-; RV32I-NEXT:  .LBB217_3: # in Loop: Header=BB217_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB217_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB217_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB217_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32I-NEXT:  .LBB217_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -17178,8 +17366,24 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB217_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB217_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB217_4
+; RV32I-NEXT:  .LBB217_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB217_5
+; RV32I-NEXT:  .LBB217_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB217_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB217_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB217_1
+; RV32I-NEXT:  .LBB217_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17203,27 +17407,9 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB217_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB217_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB217_3
 ; RV32IA-NEXT:    j .LBB217_4
-; RV32IA-NEXT:  .LBB217_3: # in Loop: Header=BB217_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB217_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB217_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB217_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:  .LBB217_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -17232,8 +17418,24 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB217_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB217_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB217_4
+; RV32IA-NEXT:  .LBB217_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB217_5
+; RV32IA-NEXT:  .LBB217_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB217_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB217_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB217_1
+; RV32IA-NEXT:  .LBB217_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17254,23 +17456,25 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB217_3
 ; RV64I-NEXT:  .LBB217_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bgeu s0, a2, .LBB217_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB217_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 3
 ; RV64I-NEXT:    mv a4, zero
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB217_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB217_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB217_1
+; RV64I-NEXT:  .LBB217_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB217_1
+; RV64I-NEXT:  .LBB217_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -17302,27 +17506,9 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB218_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB218_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB218_3
 ; RV32I-NEXT:    j .LBB218_4
-; RV32I-NEXT:  .LBB218_3: # in Loop: Header=BB218_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB218_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB218_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB218_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32I-NEXT:  .LBB218_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -17331,8 +17517,24 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB218_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB218_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB218_4
+; RV32I-NEXT:  .LBB218_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB218_5
+; RV32I-NEXT:  .LBB218_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB218_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB218_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB218_1
+; RV32I-NEXT:  .LBB218_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17356,27 +17558,9 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB218_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB218_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB218_3
 ; RV32IA-NEXT:    j .LBB218_4
-; RV32IA-NEXT:  .LBB218_3: # in Loop: Header=BB218_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB218_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB218_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB218_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:  .LBB218_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -17385,8 +17569,24 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB218_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB218_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB218_4
+; RV32IA-NEXT:  .LBB218_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB218_5
+; RV32IA-NEXT:  .LBB218_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB218_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB218_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB218_1
+; RV32IA-NEXT:  .LBB218_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17407,23 +17607,25 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB218_3
 ; RV64I-NEXT:  .LBB218_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bgeu s0, a2, .LBB218_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB218_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 4
 ; RV64I-NEXT:    addi a4, zero, 2
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB218_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB218_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB218_1
+; RV64I-NEXT:  .LBB218_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB218_1
+; RV64I-NEXT:  .LBB218_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)
@@ -17455,27 +17657,9 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32I-NEXT:    lw a1, 4(a0)
 ; RV32I-NEXT:    lw a2, 0(a0)
 ; RV32I-NEXT:    mv s3, sp
-; RV32I-NEXT:  .LBB219_1: # %atomicrmw.start
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    beq a1, s0, .LBB219_3
-; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    bne a1, s0, .LBB219_3
 ; RV32I-NEXT:    j .LBB219_4
-; RV32I-NEXT:  .LBB219_3: # in Loop: Header=BB219_1 Depth=1
-; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:  .LBB219_4: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB219_6
-; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB219_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32I-NEXT:  .LBB219_1: # %atomicrmw.start
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s3
@@ -17484,8 +17668,24 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32I-NEXT:    call __atomic_compare_exchange_8
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB219_1
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32I-NEXT:    bnez a0, .LBB219_7
+; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32I-NEXT:    beq a1, s0, .LBB219_4
+; RV32I-NEXT:  .LBB219_3: # %atomicrmw.start
+; RV32I-NEXT:    sltu a0, s0, a1
+; RV32I-NEXT:    j .LBB219_5
+; RV32I-NEXT:  .LBB219_4:
+; RV32I-NEXT:    sltu a0, s2, a2
+; RV32I-NEXT:  .LBB219_5: # %atomicrmw.start
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:    bnez a0, .LBB219_1
+; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    j .LBB219_1
+; RV32I-NEXT:  .LBB219_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17509,27 +17709,9 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32IA-NEXT:    lw a1, 4(a0)
 ; RV32IA-NEXT:    lw a2, 0(a0)
 ; RV32IA-NEXT:    mv s3, sp
-; RV32IA-NEXT:  .LBB219_1: # %atomicrmw.start
-; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    beq a1, s0, .LBB219_3
-; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    bne a1, s0, .LBB219_3
 ; RV32IA-NEXT:    j .LBB219_4
-; RV32IA-NEXT:  .LBB219_3: # in Loop: Header=BB219_1 Depth=1
-; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:  .LBB219_4: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32IA-NEXT:    xori a0, a0, 1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB219_6
-; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB219_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:  .LBB219_1: # %atomicrmw.start
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
 ; RV32IA-NEXT:    mv a1, s3
@@ -17538,8 +17720,24 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32IA-NEXT:    call __atomic_compare_exchange_8
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB219_1
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
+; RV32IA-NEXT:    bnez a0, .LBB219_7
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    beq a1, s0, .LBB219_4
+; RV32IA-NEXT:  .LBB219_3: # %atomicrmw.start
+; RV32IA-NEXT:    sltu a0, s0, a1
+; RV32IA-NEXT:    j .LBB219_5
+; RV32IA-NEXT:  .LBB219_4:
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB219_5: # %atomicrmw.start
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB219_1
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:    mv a3, s0
+; RV32IA-NEXT:    j .LBB219_1
+; RV32IA-NEXT:  .LBB219_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17560,23 +17758,25 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    ld a2, 0(a0)
 ; RV64I-NEXT:    addi s2, sp, 8
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bltu s0, a2, .LBB219_3
 ; RV64I-NEXT:  .LBB219_1: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sd a2, 8(sp)
-; RV64I-NEXT:    bgeu s0, a2, .LBB219_3
-; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV64I-NEXT:    mv a2, s0
-; RV64I-NEXT:  .LBB219_3: # %atomicrmw.start
-; RV64I-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    addi a3, zero, 5
 ; RV64I-NEXT:    addi a4, zero, 5
 ; RV64I-NEXT:    call __atomic_compare_exchange_8
 ; RV64I-NEXT:    ld a2, 8(sp)
-; RV64I-NEXT:    beqz a0, .LBB219_1
-; RV64I-NEXT:  # %bb.4: # %atomicrmw.end
+; RV64I-NEXT:    bnez a0, .LBB219_4
+; RV64I-NEXT:  # %bb.2: # %atomicrmw.start
+; RV64I-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV64I-NEXT:    sd a2, 8(sp)
+; RV64I-NEXT:    bgeu s0, a2, .LBB219_1
+; RV64I-NEXT:  .LBB219_3: # %atomicrmw.start
+; RV64I-NEXT:    mv a2, s0
+; RV64I-NEXT:    j .LBB219_1
+; RV64I-NEXT:  .LBB219_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    ld s2, 16(sp)
 ; RV64I-NEXT:    ld s1, 24(sp)

Modified: llvm/trunk/test/CodeGen/RISCV/remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/remat.ll?rev=363487&r1=363486&r2=363487&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/remat.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/remat.ll Sat Jun 15 00:49:14 2019
@@ -52,24 +52,32 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    lui s0, %hi(d)
 ; RV32I-NEXT:    lui s10, %hi(c)
 ; RV32I-NEXT:    lui s11, %hi(b)
-; RV32I-NEXT:  .LBB0_2: # %for.body
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    lw a1, %lo(l)(s2)
-; RV32I-NEXT:    beqz a1, .LBB0_4
-; RV32I-NEXT:  # %bb.3: # %if.then
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:    bnez a1, .LBB0_4
+; RV32I-NEXT:    j .LBB0_5
+; RV32I-NEXT:  .LBB0_2: # %for.inc
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
+; RV32I-NEXT:    lw a0, %lo(a)(s9)
+; RV32I-NEXT:    addi a0, a0, -1
+; RV32I-NEXT:    sw a0, %lo(a)(s9)
+; RV32I-NEXT:    beqz a0, .LBB0_11
+; RV32I-NEXT:  # %bb.3: # %for.body
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
+; RV32I-NEXT:    lw a1, %lo(l)(s2)
+; RV32I-NEXT:    beqz a1, .LBB0_5
+; RV32I-NEXT:  .LBB0_4: # %if.then
 ; RV32I-NEXT:    lw a4, %lo(e)(s1)
 ; RV32I-NEXT:    lw a3, %lo(d)(s0)
 ; RV32I-NEXT:    lw a2, %lo(c)(s10)
 ; RV32I-NEXT:    lw a1, %lo(b)(s11)
 ; RV32I-NEXT:    addi a5, zero, 32
 ; RV32I-NEXT:    call foo
-; RV32I-NEXT:  .LBB0_4: # %if.end
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:  .LBB0_5: # %if.end
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    lw a0, %lo(k)(s3)
-; RV32I-NEXT:    beqz a0, .LBB0_6
-; RV32I-NEXT:  # %bb.5: # %if.then3
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:    beqz a0, .LBB0_7
+; RV32I-NEXT:  # %bb.6: # %if.then3
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
 ; RV32I-NEXT:    lw a4, %lo(f)(s8)
 ; RV32I-NEXT:    lw a3, %lo(e)(s1)
 ; RV32I-NEXT:    lw a2, %lo(d)(s0)
@@ -77,12 +85,12 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    lw a0, %lo(b)(s11)
 ; RV32I-NEXT:    addi a5, zero, 64
 ; RV32I-NEXT:    call foo
-; RV32I-NEXT:  .LBB0_6: # %if.end5
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:  .LBB0_7: # %if.end5
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
 ; RV32I-NEXT:    lw a0, %lo(j)(s4)
-; RV32I-NEXT:    beqz a0, .LBB0_8
-; RV32I-NEXT:  # %bb.7: # %if.then7
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:    beqz a0, .LBB0_9
+; RV32I-NEXT:  # %bb.8: # %if.then7
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
 ; RV32I-NEXT:    lw a4, %lo(g)(s7)
 ; RV32I-NEXT:    lw a3, %lo(f)(s8)
 ; RV32I-NEXT:    lw a2, %lo(e)(s1)
@@ -90,12 +98,12 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    lw a0, %lo(c)(s10)
 ; RV32I-NEXT:    addi a5, zero, 32
 ; RV32I-NEXT:    call foo
-; RV32I-NEXT:  .LBB0_8: # %if.end9
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:  .LBB0_9: # %if.end9
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
 ; RV32I-NEXT:    lw a0, %lo(i)(s6)
-; RV32I-NEXT:    beqz a0, .LBB0_10
-; RV32I-NEXT:  # %bb.9: # %if.then11
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; RV32I-NEXT:    beqz a0, .LBB0_2
+; RV32I-NEXT:  # %bb.10: # %if.then11
+; RV32I-NEXT:    # in Loop: Header=BB0_5 Depth=1
 ; RV32I-NEXT:    lw a4, %lo(h)(s5)
 ; RV32I-NEXT:    lw a3, %lo(g)(s7)
 ; RV32I-NEXT:    lw a2, %lo(f)(s8)
@@ -103,12 +111,7 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    lw a0, %lo(d)(s0)
 ; RV32I-NEXT:    addi a5, zero, 32
 ; RV32I-NEXT:    call foo
-; RV32I-NEXT:  .LBB0_10: # %for.inc
-; RV32I-NEXT:    # in Loop: Header=BB0_2 Depth=1
-; RV32I-NEXT:    lw a0, %lo(a)(s9)
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    sw a0, %lo(a)(s9)
-; RV32I-NEXT:    bnez a0, .LBB0_2
+; RV32I-NEXT:    j .LBB0_2
 ; RV32I-NEXT:  .LBB0_11: # %for.end
 ; RV32I-NEXT:    addi a0, zero, 1
 ; RV32I-NEXT:    lw s11, 12(sp)




More information about the llvm-commits mailing list