[llvm] [IndVarsSimplify] sinkUnusedInvariants is skipping instructions while sinking. (PR #135205)

Sirish Pande via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 11:23:52 PDT 2025


https://github.com/srpande updated https://github.com/llvm/llvm-project/pull/135205

>From fd4defeaa685cc860adcac470e3f6964195c41d1 Mon Sep 17 00:00:00 2001
From: Sirish Pande <Sirish.Pande at amd.com>
Date: Mon, 7 Apr 2025 16:49:35 -0500
Subject: [PATCH] [IndVarsSimplify] sinkUnusedInvariants is skipping
 instructions while sinking.

While sinking instructions (that are loop invariant)  from preheader
to the exit block, we are skipping instructions due to decrementing
instruction iterator twice.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp |  43 +-
 .../IndVarSimplify/ARM/code-size.ll           | 605 +++++++++---------
 .../ARM/indvar-unroll-imm-cost.ll             |   2 +-
 .../IndVarSimplify/exit-count-select.ll       |  16 +-
 .../IndVarSimplify/exit_value_test3.ll        |   2 +-
 .../IndVarSimplify/finite-exit-comparisons.ll |  10 +-
 .../Transforms/IndVarSimplify/pr116483.ll     |   8 +-
 .../test/Transforms/IndVarSimplify/pr63763.ll |   4 +-
 .../IndVarSimplify/replace-loop-exit-folds.ll |  21 +-
 .../Transforms/IndVarSimplify/sentinel.ll     |   8 +-
 .../Transforms/LoopUnroll/unroll-cleanup.ll   |  44 +-
 .../PhaseOrdering/ARM/arm_mean_q7.ll          |  57 +-
 12 files changed, 415 insertions(+), 405 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9619dfdbf4123..3f04f7f57c54f 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1096,10 +1096,12 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   if (!Preheader) return false;
 
   bool MadeAnyChanges = false;
-  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
-  BasicBlock::iterator I(Preheader->getTerminator());
-  while (I != Preheader->begin()) {
-    --I;
+  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+    // Skip BB Terminator.
+    if (Preheader->getTerminator() == &I)
+      continue;
+
     // New instructions were inserted at the end of the preheader.
     if (isa<PHINode>(I))
       break;
@@ -1110,28 +1112,28 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
     // memory. Note that it's okay if the instruction might have undefined
     // behavior: LoopSimplify guarantees that the preheader dominates the exit
     // block.
-    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+    if (I.mayHaveSideEffects() || I.mayReadFromMemory())
       continue;
 
-    // Skip debug info intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
+    // Skip debug or pseudo instructions.
+    if (I.isDebugOrPseudoInst())
       continue;
 
     // Skip eh pad instructions.
-    if (I->isEHPad())
+    if (I.isEHPad())
       continue;
 
     // Don't sink alloca: we never want to sink static alloca's out of the
     // entry block, and correctly sinking dynamic alloca's requires
     // checks for stacksave/stackrestore intrinsics.
     // FIXME: Refactor this check somehow?
-    if (isa<AllocaInst>(I))
+    if (isa<AllocaInst>(&I))
       continue;
 
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
-    for (Use &U : I->uses()) {
+    for (Use &U : I.uses()) {
       Instruction *User = cast<Instruction>(U.getUser());
       BasicBlock *UseBB = User->getParent();
       if (PHINode *P = dyn_cast<PHINode>(User)) {
@@ -1150,26 +1152,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       continue;
 
     // Otherwise, sink it to the exit block.
-    Instruction *ToMove = &*I;
-    bool Done = false;
-
-    if (I != Preheader->begin()) {
-      // Skip debug info intrinsics.
-      do {
-        --I;
-      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
-
-      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
-        Done = true;
-    } else {
-      Done = true;
-    }
-
+    I.moveBefore(ExitBlock->getFirstInsertionPt());
+    SE->forgetValue(&I);
     MadeAnyChanges = true;
-    ToMove->moveBefore(*ExitBlock, InsertPt);
-    SE->forgetValue(ToMove);
-    if (Done) break;
-    InsertPt = ToMove->getIterator();
   }
 
   return MadeAnyChanges;
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
index c7231392229c9..51296fb86a7c3 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
@@ -1,33 +1,35 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -mtriple=thumbv8m.main -passes=indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8M
 ; RUN: opt -mtriple=thumbv8a -passes=indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8A
 
 define i32 @remove_loop(i32 %size) #0 {
-; CHECK-V8M-LABEL: @remove_loop(
-; CHECK-V8M-NEXT:  entry:
-; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-V8M-LABEL: define i32 @remove_loop(
+; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-V8M-NEXT:  [[ENTRY:.*:]]
+; CHECK-V8M-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8M:       [[WHILE_COND]]:
+; CHECK-V8M-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8M:       [[WHILE_END]]:
+; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-V8M-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-V8M-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK-V8M:       while.cond:
-; CHECK-V8M-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK-V8M:       while.end:
 ; CHECK-V8M-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-V8M-NEXT:    ret i32 [[TMP4]]
 ;
-; CHECK-V8A-LABEL: @remove_loop(
-; CHECK-V8A-NEXT:  entry:
-; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-V8A-LABEL: define i32 @remove_loop(
+; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-V8A-NEXT:  [[ENTRY:.*:]]
+; CHECK-V8A-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8A:       [[WHILE_COND]]:
+; CHECK-V8A-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8A:       [[WHILE_END]]:
+; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-V8A-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-V8A-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK-V8A:       while.cond:
-; CHECK-V8A-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK-V8A:       while.end:
 ; CHECK-V8A-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-V8A-NEXT:    ret i32 [[TMP4]]
 ;
@@ -46,58 +48,60 @@ while.end:                                        ; preds = %while.cond
 }
 
 define void @expandOuterRecurrence(i32 %arg) nounwind #0 {
-; CHECK-V8M-LABEL: @expandOuterRecurrence(
-; CHECK-V8M-NEXT:  entry:
-; CHECK-V8M-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1
+; CHECK-V8M-LABEL: define void @expandOuterRecurrence(
+; CHECK-V8M-SAME: i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-V8M-NEXT:  [[ENTRY:.*:]]
+; CHECK-V8M-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG]], 1
 ; CHECK-V8M-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
-; CHECK-V8M-NEXT:    br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
-; CHECK-V8M:       outer.preheader:
-; CHECK-V8M-NEXT:    br label [[OUTER:%.*]]
-; CHECK-V8M:       outer:
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[CMP1]], label %[[OUTER_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[OUTER_PREHEADER]]:
+; CHECK-V8M-NEXT:    br label %[[OUTER:.*]]
+; CHECK-V8M:       [[OUTER]]:
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[OUTER_INC:.*]] ], [ 0, %[[OUTER_PREHEADER]] ]
 ; CHECK-V8M-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
 ; CHECK-V8M-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
 ; CHECK-V8M-NEXT:    [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
-; CHECK-V8M-NEXT:    br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]]
-; CHECK-V8M:       inner.ph:
-; CHECK-V8M-NEXT:    br label [[INNER:%.*]]
-; CHECK-V8M:       inner:
-; CHECK-V8M-NEXT:    br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
-; CHECK-V8M:       outer.inc.loopexit:
-; CHECK-V8M-NEXT:    br label [[OUTER_INC]]
-; CHECK-V8M:       outer.inc:
+; CHECK-V8M-NEXT:    br i1 [[CMP2]], label %[[INNER_PH:.*]], label %[[OUTER_INC]]
+; CHECK-V8M:       [[INNER_PH]]:
+; CHECK-V8M-NEXT:    br label %[[INNER:.*]]
+; CHECK-V8M:       [[INNER]]:
+; CHECK-V8M-NEXT:    br i1 false, label %[[INNER]], label %[[OUTER_INC_LOOPEXIT:.*]]
+; CHECK-V8M:       [[OUTER_INC_LOOPEXIT]]:
+; CHECK-V8M-NEXT:    br label %[[OUTER_INC]]
+; CHECK-V8M:       [[OUTER_INC]]:
 ; CHECK-V8M-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
-; CHECK-V8M-NEXT:    br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
-; CHECK-V8M:       exit.loopexit:
-; CHECK-V8M-NEXT:    br label [[EXIT]]
-; CHECK-V8M:       exit:
+; CHECK-V8M-NEXT:    br i1 false, label %[[OUTER]], label %[[EXIT_LOOPEXIT:.*]]
+; CHECK-V8M:       [[EXIT_LOOPEXIT]]:
+; CHECK-V8M-NEXT:    br label %[[EXIT]]
+; CHECK-V8M:       [[EXIT]]:
 ; CHECK-V8M-NEXT:    ret void
 ;
-; CHECK-V8A-LABEL: @expandOuterRecurrence(
-; CHECK-V8A-NEXT:  entry:
-; CHECK-V8A-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1
+; CHECK-V8A-LABEL: define void @expandOuterRecurrence(
+; CHECK-V8A-SAME: i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-V8A-NEXT:  [[ENTRY:.*:]]
+; CHECK-V8A-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG]], 1
 ; CHECK-V8A-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
-; CHECK-V8A-NEXT:    br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
-; CHECK-V8A:       outer.preheader:
-; CHECK-V8A-NEXT:    br label [[OUTER:%.*]]
-; CHECK-V8A:       outer:
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[CMP1]], label %[[OUTER_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[OUTER_PREHEADER]]:
+; CHECK-V8A-NEXT:    br label %[[OUTER:.*]]
+; CHECK-V8A:       [[OUTER]]:
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[OUTER_INC:.*]] ], [ 0, %[[OUTER_PREHEADER]] ]
 ; CHECK-V8A-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
 ; CHECK-V8A-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
 ; CHECK-V8A-NEXT:    [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
-; CHECK-V8A-NEXT:    br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]]
-; CHECK-V8A:       inner.ph:
-; CHECK-V8A-NEXT:    br label [[INNER:%.*]]
-; CHECK-V8A:       inner:
-; CHECK-V8A-NEXT:    br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
-; CHECK-V8A:       outer.inc.loopexit:
-; CHECK-V8A-NEXT:    br label [[OUTER_INC]]
-; CHECK-V8A:       outer.inc:
+; CHECK-V8A-NEXT:    br i1 [[CMP2]], label %[[INNER_PH:.*]], label %[[OUTER_INC]]
+; CHECK-V8A:       [[INNER_PH]]:
+; CHECK-V8A-NEXT:    br label %[[INNER:.*]]
+; CHECK-V8A:       [[INNER]]:
+; CHECK-V8A-NEXT:    br i1 false, label %[[INNER]], label %[[OUTER_INC_LOOPEXIT:.*]]
+; CHECK-V8A:       [[OUTER_INC_LOOPEXIT]]:
+; CHECK-V8A-NEXT:    br label %[[OUTER_INC]]
+; CHECK-V8A:       [[OUTER_INC]]:
 ; CHECK-V8A-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
-; CHECK-V8A-NEXT:    br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
-; CHECK-V8A:       exit.loopexit:
-; CHECK-V8A-NEXT:    br label [[EXIT]]
-; CHECK-V8A:       exit:
+; CHECK-V8A-NEXT:    br i1 false, label %[[OUTER]], label %[[EXIT_LOOPEXIT:.*]]
+; CHECK-V8A:       [[EXIT_LOOPEXIT]]:
+; CHECK-V8A-NEXT:    br label %[[EXIT]]
+; CHECK-V8A:       [[EXIT]]:
 ; CHECK-V8A-NEXT:    ret void
 ;
 entry:
@@ -131,58 +135,60 @@ exit:
 }
 
 define i32 @test1(ptr %array, i32 %length, i32 %n) #0 {
-; CHECK-V8M-LABEL: @test1(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8M-LABEL: define i32 @test1(
+; CHECK-V8M-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @test1(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8A-LABEL: define i32 @test1(
+; CHECK-V8A-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -215,56 +221,58 @@ exit:                                             ; preds = %guarded, %entry
 declare void @maythrow()
 
 define i32 @test2(ptr %array, i32 %length, i32 %n) #0 {
-; CHECK-V8M-LABEL: @test2(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-V8M-LABEL: define i32 @test2(
+; CHECK-V8M-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @test2(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-V8A-LABEL: define i32 @test2(
+; CHECK-V8A-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -295,66 +303,68 @@ exit:                                             ; preds = %guarded, %entry
 }
 
 define i32 @two_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, i32 %n) #0 {
-; CHECK-V8M-LABEL: @two_range_checks(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]])
-; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8M-LABEL: define i32 @two_range_checks(
+; CHECK-V8M-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]])
+; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
 ; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]])
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @two_range_checks(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]])
-; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8A-LABEL: define i32 @two_range_checks(
+; CHECK-V8A-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]])
+; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
 ; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]])
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -390,74 +400,76 @@ exit:                                             ; preds = %guarded, %entry
 }
 
 define i32 @three_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, ptr %array.3, i32 %length.3, i32 %n) #0 {
-; CHECK-V8M-LABEL: @three_range_checks(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]])
-; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
-; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8M-LABEL: define i32 @three_range_checks(
+; CHECK-V8M-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH_3:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]])
+; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]])
+; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
 ; CHECK-V8M-NEXT:    [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]])
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
-; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @three_range_checks(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]])
-; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
-; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8A-LABEL: define i32 @three_range_checks(
+; CHECK-V8A-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH_3:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]])
+; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]])
+; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
 ; CHECK-V8A-NEXT:    [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]])
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP2]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
-; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -499,80 +511,82 @@ exit:                                             ; preds = %guarded, %entry
 
 ; Analogous to the above, but with two distinct branches (on different conditions)
 define i32 @distinct_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, ptr %array.3, i32 %length.3, i32 %n) #0 {
-; CHECK-V8M-LABEL: @distinct_checks(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8M-LABEL: define i32 @distinct_checks(
+; CHECK-V8M-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH_3:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8M-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]]
+; CHECK-V8M-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2]]
 ; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]])
-; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
+; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]])
 ; CHECK-V8M-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]]
 ; CHECK-V8M-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED1:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED1]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP3]], label %[[GUARDED:.*]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8M-NEXT:    br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt2:
+; CHECK-V8M-NEXT:    br i1 [[TMP4]], label %[[GUARDED1]], label %[[DEOPT2:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT2]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded1:
-; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8M:       [[GUARDED1]]:
+; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED1]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @distinct_checks(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8A-LABEL: define i32 @distinct_checks(
+; CHECK-V8A-SAME: ptr [[ARRAY_1:%.*]], i32 [[LENGTH_1:%.*]], ptr [[ARRAY_2:%.*]], i32 [[LENGTH_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH_3:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8A-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]]
+; CHECK-V8A-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2]]
 ; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]])
-; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
+; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]])
 ; CHECK-V8A-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]]
 ; CHECK-V8A-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED1:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED1]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP3]], label %[[GUARDED:.*]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8A-NEXT:    br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt2:
+; CHECK-V8A-NEXT:    br i1 [[TMP4]], label %[[GUARDED1]], label %[[DEOPT2:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT2]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded1:
-; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8A:       [[GUARDED1]]:
+; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED1]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -614,74 +628,76 @@ exit:
 }
 
 define i32 @duplicate_checks(ptr %array.1, ptr %array.2, ptr %array.3, i32 %length, i32 %n) #0 {
-; CHECK-V8M-LABEL: @duplicate_checks(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8M-LABEL: define i32 @duplicate_checks(
+; CHECK-V8M-SAME: ptr [[ARRAY_1:%.*]], ptr [[ARRAY_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED1:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED1]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP2]], label %[[GUARDED:.*]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
+; CHECK-V8M:       [[GUARDED]]:
 ; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8M-NEXT:    br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt2:
+; CHECK-V8M-NEXT:    br i1 true, label %[[GUARDED1]], label %[[DEOPT2:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT2]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded1:
-; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8M:       [[GUARDED1]]:
+; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED1]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @duplicate_checks(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
+; CHECK-V8A-LABEL: define i32 @duplicate_checks(
+; CHECK-V8A-SAME: ptr [[ARRAY_1:%.*]], ptr [[ARRAY_2:%.*]], ptr [[ARRAY_3:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
-; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
+; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH]])
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED1:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[GUARDED1]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP2]], label %[[GUARDED:.*]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
+; CHECK-V8A:       [[GUARDED]]:
 ; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
-; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
+; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
-; CHECK-V8A-NEXT:    br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt2:
+; CHECK-V8A-NEXT:    br i1 true, label %[[GUARDED1]], label %[[DEOPT2:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT2]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded1:
-; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
+; CHECK-V8A:       [[GUARDED1]]:
+; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3]], i64 [[I_I64]]
 ; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED1]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:                                   ; preds = %entry
@@ -726,62 +742,64 @@ exit:
 ; This version uses a manually lftred exit condition to work around an issue described
 ; in detail on next test.
 define i32 @different_ivs(ptr %array, i32 %length, i32 %n) #0 {
-; CHECK-V8M-LABEL: @different_ivs(
-; CHECK-V8M-NEXT:  loop.preheader:
-; CHECK-V8M-NEXT:    [[N64:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-V8M-LABEL: define i32 @different_ivs(
+; CHECK-V8M-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8M-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8M-NEXT:    [[N64:%.*]] = zext i32 [[N]] to i64
 ; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1)
 ; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
-; CHECK-V8M-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64
+; CHECK-V8M-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64
 ; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]])
 ; CHECK-V8M-NEXT:    [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64
 ; CHECK-V8M-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]]
-; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8M:       loop:
-; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8M-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8M-NEXT:    br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8M:       deopt:
+; CHECK-V8M-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8M:       [[LOOP]]:
+; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8M-NEXT:    br i1 [[TMP4]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8M:       [[DEOPT]]:
 ; CHECK-V8M-NEXT:    call void @prevent_merging()
 ; CHECK-V8M-NEXT:    ret i32 -1
-; CHECK-V8M:       guarded:
-; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I]]
+; CHECK-V8M:       [[GUARDED]]:
+; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I]]
 ; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]]
-; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8M:       exit:
-; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8M:       [[EXIT]]:
+; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
 ;
-; CHECK-V8A-LABEL: @different_ivs(
-; CHECK-V8A-NEXT:  loop.preheader:
-; CHECK-V8A-NEXT:    [[N64:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-V8A-LABEL: define i32 @different_ivs(
+; CHECK-V8A-SAME: ptr [[ARRAY:%.*]], i32 [[LENGTH:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V8A-NEXT:  [[LOOP_PREHEADER:.*]]:
+; CHECK-V8A-NEXT:    [[N64:%.*]] = zext i32 [[N]] to i64
 ; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1)
 ; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
-; CHECK-V8A-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64
+; CHECK-V8A-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64
 ; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]])
 ; CHECK-V8A-NEXT:    [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64
 ; CHECK-V8A-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]]
-; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
-; CHECK-V8A:       loop:
-; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-V8A-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-V8A-NEXT:    br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
-; CHECK-V8A:       deopt:
+; CHECK-V8A-NEXT:    br label %[[LOOP:.*]]
+; CHECK-V8A:       [[LOOP]]:
+; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], %[[GUARDED:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[GUARDED]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-V8A-NEXT:    br i1 [[TMP4]], label %[[GUARDED]], label %[[DEOPT:.*]], !prof [[PROF0]]
+; CHECK-V8A:       [[DEOPT]]:
 ; CHECK-V8A-NEXT:    call void @prevent_merging()
 ; CHECK-V8A-NEXT:    ret i32 -1
-; CHECK-V8A:       guarded:
-; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I]]
+; CHECK-V8A:       [[GUARDED]]:
+; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[I]]
 ; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
 ; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
 ; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]]
-; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK-V8A:       exit:
-; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-V8A:       [[EXIT]]:
+; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], %[[GUARDED]] ]
 ; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
 ;
 loop.preheader:
@@ -822,3 +840,8 @@ declare void @call()
 !2 = !{i32 0, i32 50}
 
 attributes #0 = { minsize optsize }
+;.
+; CHECK-V8M: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
+;.
+; CHECK-V8A: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
+;.
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
index f907f23e0b520..2261423766792 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
@@ -77,7 +77,6 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
 ; CHECK-NEXT:    [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]]
 ; CHECK:       for.body29.preheader:
-; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    br label [[FOR_BODY29:%.*]]
 ; CHECK:       for.body29:
 ; CHECK-NEXT:    [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ]
@@ -101,6 +100,7 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]]
 ; CHECK:       for.end40.loopexit:
+; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, ptr [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]]
 ; CHECK-NEXT:    [[SCEVGEP94:%.*]] = getelementptr i32, ptr [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]]
diff --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 21806c7f2cdc3..1592b84480e3f 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -4,11 +4,11 @@
 define i32 @logical_and_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_and_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
@@ -28,11 +28,11 @@ exit:
 define i32 @logical_or_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_or_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
@@ -52,13 +52,13 @@ exit:
 define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_and_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
@@ -80,13 +80,13 @@ exit:
 define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_or_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
index aba7532f5ed92..c03cd95a8c861 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
@@ -4,9 +4,9 @@
 ; is high because the loop can be deleted after the exit value rewrite.
 ;
 ; CHECK-LABEL: @_Z3fooPKcjj(
-; CHECK: udiv
 ; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]:
 ; CHECK-NOT: br {{.*}} [[LABEL]]
+; CHECK: udiv
 
 define i32 @_Z3fooPKcjj(ptr nocapture readnone %s, i32 %len, i32 %c) #0 {
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index 3c6b12dac2119..e006d9f6696ca 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -932,17 +932,17 @@ for.end:                                          ; preds = %for.body, %entry
 define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-LABEL: @ult_multiuse_profit(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[START:%.*]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 254 to i8
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i16 254 to i8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[START:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i16
+; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP2]], i16 254)
 ; CHECK-NEXT:    ret i16 [[UMAX]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
index ae108a525223e..093e25a3caa81 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr116483.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -4,15 +4,15 @@
 define i32 @test() {
 ; CHECK-LABEL: define i32 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 0, 3
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[XOR]], 329
 ; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[MUL]] to i16
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i16 [[CONV]], 8
 ; CHECK-NEXT:    [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
-; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
-; CHECK:       [[LOOP_BODY]]:
-; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
-; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/pr63763.ll b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
index 4e62e92ca07ee..427db1e67410a 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr63763.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
@@ -16,12 +16,12 @@ define i32 @test(i1 %c) {
 ; CHECK-NEXT:    [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24
 ; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]]
 ; CHECK-NEXT:    call void @use(i32 [[INVARIANT_OP]])
-; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
-; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
+; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
 ; CHECK-NEXT:    [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
 ; CHECK-NEXT:    ret i32 [[INVARIANT_OP_US]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
index 4692a542053c9..b3162de0f2245 100644
--- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
+++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
@@ -4,20 +4,21 @@
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @remove_loop(i32 %size) {
-; CHECK-LABEL: @remove_loop(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-LABEL: define i32 @remove_loop(
+; CHECK-SAME: i32 [[SIZE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK:       [[WHILE_COND]]:
+; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], %[[ENTRY]] ], [ [[SUB:%.*]], %[[WHILE_COND]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
+; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
+; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK:       [[WHILE_END]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
-; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
-; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK:       while.end:
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
index d1140affb5a4b..523414167956b 100644
--- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
@@ -10,18 +10,18 @@ define void @test(i1 %arg) personality ptr @snork {
 ; CHECK-NEXT:    br label [[BB4:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[TMP1:%.*]], [[SMAX:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP6:%.*]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMAX:%.*]]
 ; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[BB2:%.*]], label [[BB4]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP0]], [[BB1:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP1]], [[BB1:%.*]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ]
 ; CHECK-NEXT:    [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
-; CHECK-NEXT:    [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
+; CHECK-NEXT:    [[TMP6]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
 ; CHECK-NEXT:            to label [[BB7:%.*]] unwind label [[BB15:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[TMP1]] = add i32 [[TMP6]], [[INDVARS_IV]]
 ; CHECK-NEXT:    br label [[BB9:%.*]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    br i1 true, label [[BB1]], label [[BB9]]
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
index 49d6bc46dab32..3353a59ea1d12 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
@@ -23,38 +23,38 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-LABEL: define void @_Z3fn1v(
 ; CHECK-SAME: ptr writeonly captures(none) [[R:%.*]], ptr readonly captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr @b, align 4
-; CHECK-NEXT:    [[TOBOOL20:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[T:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    [[TOBOOL20:%.*]] = icmp eq i32 [[T]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL20]], label %[[FOR_END6:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_LOOPEXIT_LOOPEXIT:.*]]:
 ; CHECK-NEXT:    [[ADD_PTR_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_LCSSA_UNR:%.*]], %[[FOR_BODY3_PROL_LOOPEXIT:.*]] ], [ [[ADD_PTR_1:%.*]], %[[FOR_INC_1:.*]] ]
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A_021:%.*]], i64 1
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP1:%.*]]
-; CHECK-NEXT:    [[TMP1_PRE:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[T2:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[T1_PRE:%.*]] = load i32, ptr @b, align 4
 ; CHECK-NEXT:    br label %[[FOR_COND_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_COND_LOOPEXIT]]:
-; CHECK-NEXT:    [[T1:%.*]] = phi i32 [ [[T12:%.*]], %[[FOR_BODY]] ], [ [[TMP1_PRE]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    [[T1:%.*]] = phi i32 [ [[T12:%.*]], %[[FOR_BODY]] ], [ [[T1_PRE]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[R_1_LCSSA:%.*]] = phi ptr [ [[R_022:%.*]], %[[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
-; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi ptr [ [[A_021]], %[[FOR_BODY]] ], [ [[SCEVGEP1]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi ptr [ [[A_021:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP1]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[T1]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[FOR_END6]], label %[[FOR_BODY]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[T12]] = phi i32 [ [[T1]], %[[FOR_COND_LOOPEXIT]] ], [ [[TMP]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[T12]] = phi i32 [ [[T1]], %[[FOR_COND_LOOPEXIT]] ], [ [[T]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[R_022]] = phi ptr [ [[R_1_LCSSA]], %[[FOR_COND_LOOPEXIT]] ], [ [[R]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[A_021]] = phi ptr [ [[A_1_LCSSA]], %[[FOR_COND_LOOPEXIT]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @c, align 4
-; CHECK-NEXT:    [[TOBOOL215:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    [[T2]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[TOBOOL215:%.*]] = icmp eq i32 [[T2]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL215]], label %[[FOR_COND_LOOPEXIT]], label %[[FOR_BODY3_PREHEADER:.*]]
 ; CHECK:       [[FOR_BODY3_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP2]], -1
-; CHECK-NEXT:    [[TMP1]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[T2]], 1
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
 ; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY3_PROL_LOOPEXIT]], label %[[FOR_BODY3_PROL:.*]]
 ; CHECK:       [[FOR_BODY3_PROL]]:
-; CHECK-NEXT:    [[DEC18_PROL:%.*]] = add nsw i32 [[TMP2]], -1
-; CHECK-NEXT:    [[TMP3_PROL:%.*]] = load i8, ptr [[A_021]], align 1
-; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0
+; CHECK-NEXT:    [[DEC18_PROL:%.*]] = add nsw i32 [[T2]], -1
+; CHECK-NEXT:    [[T3_PROL:%.*]] = load i8, ptr [[A_021]], align 1
+; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp eq i8 [[T3_PROL]], 0
 ; CHECK-NEXT:    br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]]
 ; CHECK:       [[IF_THEN_PROL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[R_022]], i64 2
@@ -69,17 +69,17 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-NEXT:    br label %[[FOR_BODY3_PROL_LOOPEXIT]]
 ; CHECK:       [[FOR_BODY3_PROL_LOOPEXIT]]:
 ; CHECK-NEXT:    [[ADD_PTR_LCSSA_UNR]] = phi ptr [ poison, %[[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_PROL]], %[[FOR_INC_PROL]] ]
-; CHECK-NEXT:    [[DEC18_IN_UNR:%.*]] = phi i32 [ [[TMP2]], %[[FOR_BODY3_PREHEADER]] ], [ [[DEC18_PROL]], %[[FOR_INC_PROL]] ]
+; CHECK-NEXT:    [[DEC18_IN_UNR:%.*]] = phi i32 [ [[T2]], %[[FOR_BODY3_PREHEADER]] ], [ [[DEC18_PROL]], %[[FOR_INC_PROL]] ]
 ; CHECK-NEXT:    [[R_117_UNR:%.*]] = phi ptr [ [[R_022]], %[[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_PROL]], %[[FOR_INC_PROL]] ]
 ; CHECK-NEXT:    [[A_116_UNR:%.*]] = phi ptr [ [[A_021]], %[[FOR_BODY3_PREHEADER]] ], [ [[INCDEC_PTR_PROL]], %[[FOR_INC_PROL]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP4]], label %[[FOR_COND_LOOPEXIT_LOOPEXIT]], label %[[FOR_BODY3:.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[T2]], 1
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[FOR_COND_LOOPEXIT_LOOPEXIT]], label %[[FOR_BODY3:.*]]
 ; CHECK:       [[FOR_BODY3]]:
 ; CHECK-NEXT:    [[DEC18_IN:%.*]] = phi i32 [ [[DEC18_1:%.*]], %[[FOR_INC_1]] ], [ [[DEC18_IN_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[R_117:%.*]] = phi ptr [ [[ADD_PTR_1]], %[[FOR_INC_1]] ], [ [[R_117_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[A_116:%.*]] = phi ptr [ [[INCDEC_PTR_1:%.*]], %[[FOR_INC_1]] ], [ [[A_116_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[A_116]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0
+; CHECK-NEXT:    [[T3:%.*]] = load i8, ptr [[A_116]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[T3]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]]
 ; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[R_117]], i64 2
@@ -91,8 +91,8 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK:       [[FOR_INC]]:
 ; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A_116]], i64 1
 ; CHECK-NEXT:    [[DEC18_1]] = add nsw i32 [[DEC18_IN]], -2
-; CHECK-NEXT:    [[TMP3_1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[TMP3_1]], 0
+; CHECK-NEXT:    [[T3_1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[T3_1]], 0
 ; CHECK-NEXT:    br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]]
 ; CHECK:       [[IF_THEN_1]]:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[R_117]], i64 6
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
index 778f25f5620f2..4f3f81250adc4 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -7,47 +7,48 @@ target triple = "thumbv6m-none-none-eabi"
 ; Make sure we don't make a mess of vectorization/unrolling of the remainder loop.
 
 define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 {
-; CHECK-LABEL: @arm_mean_q7(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16
-; CHECK-NEXT:    br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
+; CHECK-LABEL: define void @arm_mean_q7(
+; CHECK-SAME: ptr noundef readonly captures(none) [[PSRC:%.*]], i32 noundef [[BLOCKSIZE:%.*]], ptr noundef writeonly captures(none) [[PRESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16
+; CHECK-NEXT:    br i1 [[CMP_NOT10]], label %[[WHILE_END:.*]], label %[[WHILE_BODY_PREHEADER:.*]]
+; CHECK:       [[WHILE_BODY_PREHEADER]]:
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4
-; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[BLOCKSIZE]], -16
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[SUM_013:%.*]] = phi i32 [ [[TMP3:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> [[TMP1]], i32 0)
-; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[SUM_013]]
+; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
+; CHECK:       [[WHILE_BODY]]:
+; CHECK-NEXT:    [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], %[[WHILE_BODY]] ], [ 0, %[[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], %[[WHILE_BODY]] ], [ [[PSRC]], %[[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[SHR]], %[[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> [[TMP0]], i32 0)
+; CHECK-NEXT:    [[TMP2]] = add i32 [[TMP1]], [[SUM_013]]
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[BLKCNT_011]], -1
 ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds nuw i8, ptr [[PSRC_ADDR_012]], i32 16
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP0]]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]]
+; CHECK:       [[WHILE_END_LOOPEXIT]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[BLOCKSIZE]], -16
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
+; CHECK-NEXT:    br label %[[WHILE_END]]
+; CHECK:       [[WHILE_END]]:
+; CHECK-NEXT:    [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP2]], %[[WHILE_END_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15
 ; CHECK-NEXT:    [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 [[CMP2_NOT15]], label %[[WHILE_END5:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = tail call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[AND]])
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[PSRC_ADDR_0_LCSSA]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[SUM_0_LCSSA]]
-; CHECK-NEXT:    br label [[WHILE_END5]]
-; CHECK:       while.end5:
-; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], [[WHILE_END]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label %[[WHILE_END5]]
+; CHECK:       [[WHILE_END5]]:
+; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], %[[WHILE_END]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUM_1_LCSSA]], [[BLOCKSIZE]]
 ; CHECK-NEXT:    [[CONV6:%.*]] = trunc i32 [[DIV]] to i8
-; CHECK-NEXT:    store i8 [[CONV6]], ptr [[PRESULT:%.*]], align 1
+; CHECK-NEXT:    store i8 [[CONV6]], ptr [[PRESULT]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:



More information about the llvm-commits mailing list