[llvm] 9b25ad8 - [CodeGenPrepare][X86] Add tests for fixing `urem` transform; NFC

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 09:17:58 PDT 2024


Author: Noah Goldstein
Date: 2024-08-20T09:17:49-07:00
New Revision: 9b25ad818c0b82fe4db8b43e9c9700805a2c7322

URL: https://github.com/llvm/llvm-project/commit/9b25ad818c0b82fe4db8b43e9c9700805a2c7322
DIFF: https://github.com/llvm/llvm-project/commit/9b25ad818c0b82fe4db8b43e9c9700805a2c7322.diff

LOG: [CodeGenPrepare][X86] Add tests for fixing `urem` transform; NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/fold-loop-of-urem.ll
    llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
index aad2e0dd7bd248..815833cc137a57 100644
--- a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
+++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
@@ -53,11 +53,217 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+define void @simple_urem_to_sel_fail_not_in_loop(i32 %N, i32 %rem_amt) nounwind {
+; CHECK-LABEL: simple_urem_to_sel_fail_not_in_loop:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je .LBB1_1
+; CHECK-NEXT:  # %bb.3: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r14d
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_4: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl %ebp, %edi
+; CHECK-NEXT:    callq use.i32 at PLT
+; CHECK-NEXT:    incl %ebp
+; CHECK-NEXT:    cmpl %ebp, %r14d
+; CHECK-NEXT:    jne .LBB1_4
+; CHECK-NEXT:    jmp .LBB1_2
+; CHECK-NEXT:  .LBB1_1:
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:  .LBB1_2: # %for.cond.cleanup
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ebx
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    jmp use.i32 at PLT # TAILCALL
+entry:
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %rem = urem i32 %i.05, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void @use.i32(i32 %i.04)
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @simple_urem_to_sel_inner_loop(i32 %N, i32 %M) nounwind {
+; CHECK-LABEL: simple_urem_to_sel_inner_loop:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %edi, %ebp
+; CHECK-NEXT:    callq get.i32 at PLT
+; CHECK-NEXT:    testl %ebp, %ebp
+; CHECK-NEXT:    je .LBB2_6
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %eax, %r14d
+; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    jmp .LBB2_2
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB2_5: # %for.inner.cond.cleanup
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    incl %r15d
+; CHECK-NEXT:    cmpl %ebp, %r15d
+; CHECK-NEXT:    je .LBB2_6
+; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_4 Depth 2
+; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    je .LBB2_5
+; CHECK-NEXT:  # %bb.3: # %for.inner.body.preheader
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    movl %ebx, %r12d
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB2_4: # %for.inner.body
+; CHECK-NEXT:    # Parent Loop BB2_2 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    movl %r15d, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r14d
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    callq use.i32 at PLT
+; CHECK-NEXT:    decl %r12d
+; CHECK-NEXT:    jne .LBB2_4
+; CHECK-NEXT:    jmp .LBB2_5
+; CHECK-NEXT:  .LBB2_6: # %for.cond.cleanup
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+entry:
+  %rem_amt = call i32 @get.i32()
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.inner.cond.cleanup ], [ 0, %entry ]
+
+  %cmp_inner = icmp eq i32 %M, 0
+  br i1 %cmp_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.body:
+  %j = phi i32 [ %inc_inner, %for.inner.body ], [ 0, %for.body ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc_inner = add nuw i32 %j, 1
+  %exitcond_inner = icmp eq i32 %inc_inner, %M
+  br i1 %exitcond_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.cond.cleanup:
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @simple_urem_to_sel_inner_loop_fail_not_invariant(i32 %N, i32 %M) nounwind {
+; CHECK-LABEL: simple_urem_to_sel_inner_loop_fail_not_invariant:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je .LBB3_7
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %edi, %ebp
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    jmp .LBB3_2
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB3_5: # %for.inner.cond.cleanup
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    incl %r14d
+; CHECK-NEXT:    cmpl %ebp, %r14d
+; CHECK-NEXT:    je .LBB3_6
+; CHECK-NEXT:  .LBB3_2: # %for.body
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB3_4 Depth 2
+; CHECK-NEXT:    callq get.i32 at PLT
+; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    je .LBB3_5
+; CHECK-NEXT:  # %bb.3: # %for.inner.body.preheader
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    movl %eax, %r15d
+; CHECK-NEXT:    movl %ebx, %r12d
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB3_4: # %for.inner.body
+; CHECK-NEXT:    # Parent Loop BB3_2 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    movl %r14d, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r15d
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    callq use.i32 at PLT
+; CHECK-NEXT:    decl %r12d
+; CHECK-NEXT:    jne .LBB3_4
+; CHECK-NEXT:    jmp .LBB3_5
+; CHECK-NEXT:  .LBB3_6:
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:  .LBB3_7: # %for.cond.cleanup
+; CHECK-NEXT:    retq
+entry:
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.inner.cond.cleanup ], [ 0, %entry ]
+  %rem_amt = call i32 @get.i32()
+  %cmp_inner = icmp eq i32 %M, 0
+  br i1 %cmp_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.body:
+  %j = phi i32 [ %inc_inner, %for.inner.body ], [ 0, %for.body ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc_inner = add nuw i32 %j, 1
+  %exitcond_inner = icmp eq i32 %inc_inner, %M
+  br i1 %exitcond_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.cond.cleanup:
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_to_sel_nested2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB1_8
+; CHECK-NEXT:    je .LBB4_8
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -65,41 +271,41 @@ define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %esi, %ebx
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    jmp .LBB1_2
+; CHECK-NEXT:    jmp .LBB4_2
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_5: # %for.body1
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:  .LBB4_5: # %for.body1
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ebx
 ; CHECK-NEXT:    movl %edx, %edi
 ; CHECK-NEXT:    callq use.i32 at PLT
-; CHECK-NEXT:  .LBB1_6: # %for.body.tail
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:  .LBB4_6: # %for.body.tail
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    je .LBB1_7
-; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    je .LBB4_7
+; CHECK-NEXT:  .LBB4_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    je .LBB1_6
+; CHECK-NEXT:    je .LBB4_6
 ; CHECK-NEXT:  # %bb.3: # %for.body0
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    jne .LBB1_5
+; CHECK-NEXT:    jne .LBB4_5
 ; CHECK-NEXT:  # %bb.4: # %for.body2
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    jne .LBB1_5
-; CHECK-NEXT:    jmp .LBB1_6
-; CHECK-NEXT:  .LBB1_7:
+; CHECK-NEXT:    jne .LBB4_5
+; CHECK-NEXT:    jmp .LBB4_6
+; CHECK-NEXT:  .LBB4_7:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB1_8: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB4_8: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -132,55 +338,55 @@ define void @simple_urem_fail_bad_incr3(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_bad_incr3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB2_9
+; CHECK-NEXT:    je .LBB5_9
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %esi, %ebx
-; CHECK-NEXT:    jmp .LBB2_2
+; CHECK-NEXT:    jmp .LBB5_2
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB2_6: # %for.body1
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:  .LBB5_6: # %for.body1
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
 ; CHECK-NEXT:    movl %ebp, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ebx
 ; CHECK-NEXT:    movl %edx, %edi
 ; CHECK-NEXT:    callq use.i32 at PLT
-; CHECK-NEXT:  .LBB2_7: # %for.body.tail
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:  .LBB5_7: # %for.body.tail
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    jne .LBB2_8
-; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    jne .LBB5_8
+; CHECK-NEXT:  .LBB5_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    je .LBB2_5
+; CHECK-NEXT:    je .LBB5_5
 ; CHECK-NEXT:  # %bb.3: # %for.body0
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    movl %eax, %r14d
 ; CHECK-NEXT:    callq get.i32 at PLT
 ; CHECK-NEXT:    testb $1, %r14b
-; CHECK-NEXT:    je .LBB2_7
-; CHECK-NEXT:  # %bb.4: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    je .LBB5_7
+; CHECK-NEXT:  # %bb.4: # in Loop: Header=BB5_2 Depth=1
 ; CHECK-NEXT:    movl %eax, %ebp
 ; CHECK-NEXT:    incl %ebp
-; CHECK-NEXT:    jmp .LBB2_6
+; CHECK-NEXT:    jmp .LBB5_6
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB2_5: # %for.body2
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:  .LBB5_5: # %for.body2
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    jne .LBB2_6
-; CHECK-NEXT:    jmp .LBB2_7
-; CHECK-NEXT:  .LBB2_8:
+; CHECK-NEXT:    jne .LBB5_6
+; CHECK-NEXT:    jmp .LBB5_7
+; CHECK-NEXT:  .LBB5_8:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB2_9: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB5_9: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -222,7 +428,7 @@ define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm0, %r14
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB3_1: # %for.body
+; CHECK-NEXT:  .LBB6_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movdqa (%rsp), %xmm1 # 16-byte Reload
 ; CHECK-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
@@ -242,7 +448,7 @@ define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind {
 ; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    je .LBB3_1
+; CHECK-NEXT:    je .LBB6_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    addq $24, %rsp
 ; CHECK-NEXT:    popq %rbx
@@ -267,7 +473,7 @@ define void @simple_urem_fail_bad_incr(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_bad_incr:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB4_6
+; CHECK-NEXT:    je .LBB7_6
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -275,10 +481,10 @@ define void @simple_urem_fail_bad_incr(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %esi, %ebx
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    jmp .LBB4_2
+; CHECK-NEXT:    jmp .LBB7_2
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB4_4: # %for.body.tail
-; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:  .LBB7_4: # %for.body.tail
+; CHECK-NEXT:    # in Loop: Header=BB7_2 Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ebx
@@ -286,22 +492,22 @@ define void @simple_urem_fail_bad_incr(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %ebp, %r14d
-; CHECK-NEXT:    je .LBB4_5
-; CHECK-NEXT:  .LBB4_2: # %for.body
+; CHECK-NEXT:    je .LBB7_5
+; CHECK-NEXT:  .LBB7_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    callq get.i1 at PLT
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    je .LBB4_4
+; CHECK-NEXT:    je .LBB7_4
 ; CHECK-NEXT:  # %bb.3: # %for.body0
-; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB7_2 Depth=1
 ; CHECK-NEXT:    callq get.i32 at PLT
 ; CHECK-NEXT:    movl %eax, %r14d
-; CHECK-NEXT:    jmp .LBB4_4
-; CHECK-NEXT:  .LBB4_5:
+; CHECK-NEXT:    jmp .LBB7_4
+; CHECK-NEXT:  .LBB7_5:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB4_6: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB7_6: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -331,7 +537,7 @@ define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_to_sel_second_acc:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    jb .LBB5_4
+; CHECK-NEXT:    jb .LBB8_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r15
@@ -343,7 +549,7 @@ define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB5_2: # %for.body
+; CHECK-NEXT:  .LBB8_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -353,14 +559,14 @@ define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    addl $2, %r15d
 ; CHECK-NEXT:    cmpl %ebp, %r15d
-; CHECK-NEXT:    jbe .LBB5_2
+; CHECK-NEXT:    jbe .LBB8_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB5_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB8_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp ult i32 %N, 2
@@ -384,7 +590,7 @@ define void @simple_urem_fail_srem(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_srem:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB6_4
+; CHECK-NEXT:    je .LBB9_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -393,7 +599,7 @@ define void @simple_urem_fail_srem(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB6_2: # %for.body
+; CHECK-NEXT:  .LBB9_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    cltd
@@ -402,12 +608,12 @@ define void @simple_urem_fail_srem(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB6_2
+; CHECK-NEXT:    jne .LBB9_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB6_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB9_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -429,7 +635,7 @@ define void @simple_urem_fail_missing_nuw(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_missing_nuw:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB7_4
+; CHECK-NEXT:    je .LBB10_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -438,7 +644,7 @@ define void @simple_urem_fail_missing_nuw(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB7_2: # %for.body
+; CHECK-NEXT:  .LBB10_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -447,12 +653,12 @@ define void @simple_urem_fail_missing_nuw(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB7_2
+; CHECK-NEXT:    jne .LBB10_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB7_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB10_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -474,7 +680,7 @@ define void @simple_urem_fail_bad_incr2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_bad_incr2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB8_4
+; CHECK-NEXT:    je .LBB11_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -483,7 +689,7 @@ define void @simple_urem_fail_bad_incr2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB8_2: # %for.body
+; CHECK-NEXT:  .LBB11_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -492,12 +698,12 @@ define void @simple_urem_fail_bad_incr2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    addl $2, %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB8_2
+; CHECK-NEXT:    jne .LBB11_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB8_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB11_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -519,7 +725,7 @@ define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_non_zero_entry4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB9_4
+; CHECK-NEXT:    je .LBB12_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -528,7 +734,7 @@ define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    movl $4, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB9_2: # %for.body
+; CHECK-NEXT:  .LBB12_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -537,12 +743,12 @@ define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB9_2
+; CHECK-NEXT:    jne .LBB12_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB9_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB12_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -564,7 +770,7 @@ define void @simple_urem_skip_const_rem_amt(i32 %N) nounwind {
 ; CHECK-LABEL: simple_urem_skip_const_rem_amt:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB10_4
+; CHECK-NEXT:    je .LBB13_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -574,7 +780,7 @@ define void @simple_urem_skip_const_rem_amt(i32 %N) nounwind {
 ; CHECK-NEXT:    movl $4, %ebp
 ; CHECK-NEXT:    movl $2938661835, %r14d # imm = 0xAF286BCB
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB10_2: # %for.body
+; CHECK-NEXT:  .LBB13_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %ebp, %eax
 ; CHECK-NEXT:    imulq %r14, %rax
@@ -591,12 +797,12 @@ define void @simple_urem_skip_const_rem_amt(i32 %N) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %ebp
 ; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    jne .LBB10_2
+; CHECK-NEXT:    jne .LBB13_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB10_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB13_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -623,14 +829,14 @@ define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) n
 ; CHECK-NEXT:    movl %esi, %ebx
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB11_1
+; CHECK-NEXT:    je .LBB14_1
 ; CHECK-NEXT:  # %bb.2: # %for.body1
 ; CHECK-NEXT:    movl $1, %r14d
-; CHECK-NEXT:    jmp .LBB11_3
-; CHECK-NEXT:  .LBB11_1:
+; CHECK-NEXT:    jmp .LBB14_3
+; CHECK-NEXT:  .LBB14_1:
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB11_3: # %for.body
+; CHECK-NEXT:  .LBB14_3: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -639,7 +845,7 @@ define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) n
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB11_3
+; CHECK-NEXT:    jne .LBB14_3
 ; CHECK-NEXT:  # %bb.4: # %for.cond.cleanup
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
@@ -671,7 +877,7 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
 ; CHECK-LABEL: simple_urem_multi_latch_non_canonical:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB12_6
+; CHECK-NEXT:    je .LBB15_6
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r15
@@ -682,12 +888,12 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    decl %ebp
 ; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    jmp .LBB12_2
+; CHECK-NEXT:    jmp .LBB15_2
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB12_3: # %for.body.backedge
-; CHECK-NEXT:    # in Loop: Header=BB12_2 Depth=1
+; CHECK-NEXT:  .LBB15_3: # %for.body.backedge
+; CHECK-NEXT:    # in Loop: Header=BB15_2 Depth=1
 ; CHECK-NEXT:    incl %r14d
-; CHECK-NEXT:  .LBB12_2: # %for.body
+; CHECK-NEXT:  .LBB15_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -698,19 +904,19 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
 ; CHECK-NEXT:    movl %eax, %r15d
 ; CHECK-NEXT:    callq do_stuff0 at PLT
 ; CHECK-NEXT:    testb $1, %r15b
-; CHECK-NEXT:    je .LBB12_3
+; CHECK-NEXT:    je .LBB15_3
 ; CHECK-NEXT:  # %bb.4: # %for.body0
-; CHECK-NEXT:    # in Loop: Header=BB12_2 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB15_2 Depth=1
 ; CHECK-NEXT:    callq do_stuff1 at PLT
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB12_3
+; CHECK-NEXT:    jne .LBB15_3
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB12_6: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB15_6: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -744,25 +950,25 @@ define void @simple_urem_fail_bad_loop(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq get.i32 at PLT
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    # implicit-def: $r14d
-; CHECK-NEXT:    jne .LBB13_4
+; CHECK-NEXT:    jne .LBB16_4
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:  .LBB13_2: # %for.cond
+; CHECK-NEXT:  .LBB16_2: # %for.cond
 ; CHECK-NEXT:    cmpl %ebp, %r14d
-; CHECK-NEXT:    jae .LBB13_5
+; CHECK-NEXT:    jae .LBB16_5
 ; CHECK-NEXT:  # %bb.3: # %for.body
 ; CHECK-NEXT:    movl %r14d, %edi
 ; CHECK-NEXT:    xorl $1, %edi
 ; CHECK-NEXT:    callq use.i32 at PLT
-; CHECK-NEXT:  .LBB13_4: # %halfway
+; CHECK-NEXT:  .LBB16_4: # %halfway
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ebx
 ; CHECK-NEXT:    movl %edx, %edi
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
-; CHECK-NEXT:    jmp .LBB13_2
-; CHECK-NEXT:  .LBB13_5: # %for.end
+; CHECK-NEXT:    jmp .LBB16_2
+; CHECK-NEXT:  .LBB16_5: # %for.end
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
@@ -797,7 +1003,7 @@ define void @simple_urem_fail_intermediate_inc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: simple_urem_fail_intermediate_inc:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB14_4
+; CHECK-NEXT:    je .LBB17_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
@@ -807,7 +1013,7 @@ define void @simple_urem_fail_intermediate_inc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    negl %r14d
 ; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB14_2: # %for.body
+; CHECK-NEXT:  .LBB17_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -819,12 +1025,12 @@ define void @simple_urem_fail_intermediate_inc(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    cmpl $1, %eax
 ; CHECK-NEXT:    movl %ecx, %r15d
-; CHECK-NEXT:    jne .LBB14_2
+; CHECK-NEXT:    jne .LBB17_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:  .LBB14_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB17_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp eq i32 %N, 0
@@ -847,9 +1053,9 @@ define void @weird_loop(i64 %sub.ptr.div.i56) personality ptr null {
 ; CHECK-LABEL: weird_loop:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB15_1: # %for.body
+; CHECK-NEXT:  .LBB18_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    jmp .LBB15_1
+; CHECK-NEXT:    jmp .LBB18_1
 entry:
   br label %for.preheader
 
@@ -863,11 +1069,11 @@ for.body:
   br i1 false, label %for.preheader, label %for.body
 }
 
-define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
-; CHECK-LABEL: simple_urem_to_sel_non_zero_start:
+define void @simple_urem_to_sel_non_zero_start_fail(i32 %N, i32 %rem_amt) nounwind {
+; CHECK-LABEL: simple_urem_to_sel_non_zero_start_fail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl $3, %edi
-; CHECK-NEXT:    jb .LBB16_4
+; CHECK-NEXT:    jb .LBB19_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %r14
@@ -876,7 +1082,7 @@ define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    movl $2, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB16_2: # %for.body
+; CHECK-NEXT:  .LBB19_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -885,12 +1091,12 @@ define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB16_2
+; CHECK-NEXT:    jne .LBB19_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  .LBB16_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB19_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp ult i32 %N, 3
@@ -908,11 +1114,58 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+define void @simple_urem_to_sel_non_zero_start_okay(i32 %N, i32 %rem_amt_in) nounwind {
+; CHECK-LABEL: simple_urem_to_sel_non_zero_start_okay:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    jb .LBB20_4
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %edi, %ebp
+; CHECK-NEXT:    orl $16, %ebx
+; CHECK-NEXT:    movl $2, %r14d
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB20_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl %r14d, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ebx
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    callq use.i32 at PLT
+; CHECK-NEXT:    incl %r14d
+; CHECK-NEXT:    cmpl %r14d, %ebp
+; CHECK-NEXT:    jne .LBB20_2
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:  .LBB20_4: # %for.cond.cleanup
+; CHECK-NEXT:    retq
+entry:
+  %rem_amt = or i32 %rem_amt_in, 16
+  %cmp3.not = icmp ult i32 %N, 3
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_in) nounwind {
 ; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl $3, %edi
-; CHECK-NEXT:    jb .LBB17_4
+; CHECK-NEXT:    jb .LBB21_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
@@ -923,7 +1176,7 @@ define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_
 ; CHECK-NEXT:    negl %r14d
 ; CHECK-NEXT:    movl $7, %r15d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB17_2: # %for.body
+; CHECK-NEXT:  .LBB21_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -935,12 +1188,12 @@ define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    cmpl $5, %eax
 ; CHECK-NEXT:    movl %ecx, %r15d
-; CHECK-NEXT:    jne .LBB17_2
+; CHECK-NEXT:    jne .LBB21_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:  .LBB17_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB21_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %rem_amt = or i32 %rem_amt_in, 16
@@ -964,7 +1217,7 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw(i32
 ; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl $3, %edi
-; CHECK-NEXT:    jb .LBB18_4
+; CHECK-NEXT:    jb .LBB22_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
@@ -975,7 +1228,7 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw(i32
 ; CHECK-NEXT:    negl %r14d
 ; CHECK-NEXT:    movl $7, %r15d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB18_2: # %for.body
+; CHECK-NEXT:  .LBB22_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -987,12 +1240,12 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw(i32
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    cmpl $5, %eax
 ; CHECK-NEXT:    movl %ecx, %r15d
-; CHECK-NEXT:    jne .LBB18_2
+; CHECK-NEXT:    jne .LBB22_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:  .LBB18_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB22_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %rem_amt = or i32 %rem_amt_in, 16
@@ -1016,7 +1269,7 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem(
 ; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl $3, %edi
-; CHECK-NEXT:    jb .LBB19_4
+; CHECK-NEXT:    jb .LBB23_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
@@ -1026,7 +1279,7 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem(
 ; CHECK-NEXT:    negl %r14d
 ; CHECK-NEXT:    movl $7, %r15d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB19_2: # %for.body
+; CHECK-NEXT:  .LBB23_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -1038,12 +1291,12 @@ define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem(
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    cmpl $5, %eax
 ; CHECK-NEXT:    movl %ecx, %r15d
-; CHECK-NEXT:    jne .LBB19_2
+; CHECK-NEXT:    jne .LBB23_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:  .LBB19_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB23_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp ult i32 %N, 3
@@ -1070,12 +1323,12 @@ define void @simple_urem_to_sel_non_zero_start_through_sub(i32 %N, i32 %rem_amt,
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    subl %edx, %ebp
-; CHECK-NEXT:    jbe .LBB20_3
+; CHECK-NEXT:    jbe .LBB24_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    movl %esi, %ebx
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB20_2: # %for.body
+; CHECK-NEXT:  .LBB24_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -1084,8 +1337,8 @@ define void @simple_urem_to_sel_non_zero_start_through_sub(i32 %N, i32 %rem_amt,
 ; CHECK-NEXT:    callq use.i32 at PLT
 ; CHECK-NEXT:    incl %r14d
 ; CHECK-NEXT:    cmpl %r14d, %ebp
-; CHECK-NEXT:    jne .LBB20_2
-; CHECK-NEXT:  .LBB20_3: # %for.cond.cleanup
+; CHECK-NEXT:    jne .LBB24_2
+; CHECK-NEXT:  .LBB24_3: # %for.cond.cleanup
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %rbp
@@ -1111,7 +1364,7 @@ define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy(i32 %N, i3
 ; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_sub_no_simplfy:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl %edx, %edi
-; CHECK-NEXT:    jbe .LBB21_4
+; CHECK-NEXT:    jbe .LBB25_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
@@ -1122,7 +1375,7 @@ define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy(i32 %N, i3
 ; CHECK-NEXT:    negl %r14d
 ; CHECK-NEXT:    addl $-2, %r15d
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB21_2: # %for.body
+; CHECK-NEXT:  .LBB25_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
@@ -1134,12 +1387,12 @@ define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy(i32 %N, i3
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    cmpl $-2, %eax
 ; CHECK-NEXT:    movl %ecx, %r15d
-; CHECK-NEXT:    jne .LBB21_2
+; CHECK-NEXT:    jne .LBB25_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:  .LBB21_4: # %for.cond.cleanup
+; CHECK-NEXT:  .LBB25_4: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
 entry:
   %cmp3.not = icmp ule i32 %N, %start

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
index a019679e65905d..a58438d03b7157 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
@@ -42,6 +42,157 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+define void @simple_urem_to_sel_fail_not_in_loop(i32 %N, i32 %rem_amt) nounwind {
+; CHECK-LABEL: define void @simple_urem_to_sel_fail_not_in_loop(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[I_05]], [[REM_AMT]]
+; CHECK-NEXT:    tail call void @use.i32(i32 [[REM]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    tail call void @use.i32(i32 [[I_04]])
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %rem = urem i32 %i.05, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void @use.i32(i32 %i.04)
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @simple_urem_to_sel_inner_loop(i32 %N, i32 %M) nounwind {
+; CHECK-LABEL: define void @simple_urem_to_sel_inner_loop(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[M:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[REM_AMT:%.*]] = call i32 @get.i32()
+; CHECK-NEXT:    [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_INNER_COND_CLEANUP:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[CMP_INNER:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT:    br i1 [[CMP_INNER]], label %[[FOR_INNER_COND_CLEANUP]], label %[[FOR_INNER_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_INNER_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_BODY:.*]]
+; CHECK:       [[FOR_INNER_BODY]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[INC_INNER:%.*]], %[[FOR_INNER_BODY]] ], [ 0, %[[FOR_INNER_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
+; CHECK-NEXT:    tail call void @use.i32(i32 [[REM]])
+; CHECK-NEXT:    [[INC_INNER]] = add nuw i32 [[J]], 1
+; CHECK-NEXT:    [[EXITCOND_INNER:%.*]] = icmp eq i32 [[INC_INNER]], [[M]]
+; CHECK-NEXT:    br i1 [[EXITCOND_INNER]], label %[[FOR_INNER_COND_CLEANUP]], label %[[FOR_INNER_BODY]]
+; CHECK:       [[FOR_INNER_COND_CLEANUP]]:
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  %rem_amt = call i32 @get.i32()
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.inner.cond.cleanup ], [ 0, %entry ]
+
+  %cmp_inner = icmp eq i32 %M, 0
+  br i1 %cmp_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.body:
+  %j = phi i32 [ %inc_inner, %for.inner.body ], [ 0, %for.body ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc_inner = add nuw i32 %j, 1
+  %exitcond_inner = icmp eq i32 %inc_inner, %M
+  br i1 %exitcond_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.cond.cleanup:
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @simple_urem_to_sel_inner_loop_fail_not_invariant(i32 %N, i32 %M) nounwind {
+; CHECK-LABEL: define void @simple_urem_to_sel_inner_loop_fail_not_invariant(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[M:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_INNER_COND_CLEANUP:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[REM_AMT:%.*]] = call i32 @get.i32()
+; CHECK-NEXT:    [[CMP_INNER:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT:    br i1 [[CMP_INNER]], label %[[FOR_INNER_COND_CLEANUP]], label %[[FOR_INNER_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_INNER_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_INNER_BODY:.*]]
+; CHECK:       [[FOR_INNER_BODY]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[INC_INNER:%.*]], %[[FOR_INNER_BODY]] ], [ 0, %[[FOR_INNER_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
+; CHECK-NEXT:    tail call void @use.i32(i32 [[REM]])
+; CHECK-NEXT:    [[INC_INNER]] = add nuw i32 [[J]], 1
+; CHECK-NEXT:    [[EXITCOND_INNER:%.*]] = icmp eq i32 [[INC_INNER]], [[M]]
+; CHECK-NEXT:    br i1 [[EXITCOND_INNER]], label %[[FOR_INNER_COND_CLEANUP]], label %[[FOR_INNER_BODY]]
+; CHECK:       [[FOR_INNER_COND_CLEANUP]]:
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  %cmp3.not = icmp eq i32 %N, 0
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.inner.cond.cleanup ], [ 0, %entry ]
+  %rem_amt = call i32 @get.i32()
+  %cmp_inner = icmp eq i32 %M, 0
+  br i1 %cmp_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.body:
+  %j = phi i32 [ %inc_inner, %for.inner.body ], [ 0, %for.body ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc_inner = add nuw i32 %j, 1
+  %exitcond_inner = icmp eq i32 %inc_inner, %M
+  br i1 %exitcond_inner, label %for.inner.cond.cleanup, label %for.inner.body
+
+for.inner.cond.cleanup:
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-LABEL: define void @simple_urem_to_sel_nested2(
 ; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] {
@@ -639,8 +790,8 @@ for.body:
   br i1 false, label %for.preheader, label %for.body
 }
 
-define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
-; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start(
+define void @simple_urem_to_sel_non_zero_start_fail(i32 %N, i32 %rem_amt) nounwind {
+; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_fail(
 ; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3
@@ -673,6 +824,42 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+define void @simple_urem_to_sel_non_zero_start_okay(i32 %N, i32 %rem_amt_in) nounwind {
+; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_okay(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT_IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[REM_AMT:%.*]] = or i32 [[REM_AMT_IN]], 16
+; CHECK-NEXT:    [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3
+; CHECK-NEXT:    br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
+; CHECK-NEXT:    tail call void @use.i32(i32 [[REM]])
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  %rem_amt = or i32 %rem_amt_in, 16
+  %cmp3.not = icmp ult i32 %N, 3
+  br i1 %cmp3.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
+  %rem = urem i32 %i.04, %rem_amt
+  tail call void @use.i32(i32 %rem)
+  %inc = add nuw i32 %i.04, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_in) nounwind {
 ; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_add(
 ; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT_IN:%.*]]) #[[ATTR0]] {


        


More information about the llvm-commits mailing list