[llvm] [IndVarsSimplify] sinkUnusedInvariants is skipping instructions while sinking. (PR #135205)

Sirish Pande via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 14:36:06 PDT 2025


https://github.com/srpande updated https://github.com/llvm/llvm-project/pull/135205

>From c90f7e68ecb114344c2e40e9b9a76cb8fa98028c Mon Sep 17 00:00:00 2001
From: Sirish Pande <Sirish.Pande at amd.com>
Date: Mon, 7 Apr 2025 16:49:35 -0500
Subject: [PATCH] [IndVarsSimplify] sinkUnusedInvariants is skipping
 instructions while sinking.

While sinking instructions (that are loop invariant)  from preheader
to the exit block, we are skipping instructions due to decrementing
instruction iterator twice.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 43 ++++++------------
 .../IndVarSimplify/ARM/code-size.ll           | 22 +++++-----
 .../ARM/indvar-unroll-imm-cost.ll             |  2 +-
 .../IndVarSimplify/exit-count-select.ll       | 16 +++----
 .../IndVarSimplify/exit_value_test3.ll        |  2 +-
 .../IndVarSimplify/finite-exit-comparisons.ll | 10 ++---
 .../Transforms/IndVarSimplify/pr116483.ll     |  8 ++--
 .../test/Transforms/IndVarSimplify/pr63763.ll |  4 +-
 .../IndVarSimplify/replace-loop-exit-folds.ll | 21 ++++-----
 .../Transforms/IndVarSimplify/sentinel.ll     |  8 ++--
 .../Transforms/LoopUnroll/unroll-cleanup.ll   | 44 +++++++++----------
 .../PhaseOrdering/ARM/arm_mean_q7.ll          | 14 +++---
 12 files changed, 91 insertions(+), 103 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9619dfdbf4123..3f04f7f57c54f 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1096,10 +1096,12 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   if (!Preheader) return false;
 
   bool MadeAnyChanges = false;
-  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
-  BasicBlock::iterator I(Preheader->getTerminator());
-  while (I != Preheader->begin()) {
-    --I;
+  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+    // Skip BB Terminator.
+    if (Preheader->getTerminator() == &I)
+      continue;
+
     // New instructions were inserted at the end of the preheader.
     if (isa<PHINode>(I))
       break;
@@ -1110,28 +1112,28 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
     // memory. Note that it's okay if the instruction might have undefined
     // behavior: LoopSimplify guarantees that the preheader dominates the exit
     // block.
-    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+    if (I.mayHaveSideEffects() || I.mayReadFromMemory())
       continue;
 
-    // Skip debug info intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
+    // Skip debug or pseudo instructions.
+    if (I.isDebugOrPseudoInst())
       continue;
 
     // Skip eh pad instructions.
-    if (I->isEHPad())
+    if (I.isEHPad())
       continue;
 
     // Don't sink alloca: we never want to sink static alloca's out of the
     // entry block, and correctly sinking dynamic alloca's requires
     // checks for stacksave/stackrestore intrinsics.
     // FIXME: Refactor this check somehow?
-    if (isa<AllocaInst>(I))
+    if (isa<AllocaInst>(&I))
       continue;
 
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
-    for (Use &U : I->uses()) {
+    for (Use &U : I.uses()) {
       Instruction *User = cast<Instruction>(U.getUser());
       BasicBlock *UseBB = User->getParent();
       if (PHINode *P = dyn_cast<PHINode>(User)) {
@@ -1150,26 +1152,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       continue;
 
     // Otherwise, sink it to the exit block.
-    Instruction *ToMove = &*I;
-    bool Done = false;
-
-    if (I != Preheader->begin()) {
-      // Skip debug info intrinsics.
-      do {
-        --I;
-      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
-
-      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
-        Done = true;
-    } else {
-      Done = true;
-    }
-
+    I.moveBefore(ExitBlock->getFirstInsertionPt());
+    SE->forgetValue(&I);
     MadeAnyChanges = true;
-    ToMove->moveBefore(*ExitBlock, InsertPt);
-    SE->forgetValue(ToMove);
-    if (Done) break;
-    InsertPt = ToMove->getIterator();
   }
 
   return MadeAnyChanges;
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
index c7231392229c9..2003b1a72206d 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
@@ -4,30 +4,32 @@
 
 define i32 @remove_loop(i32 %size) #0 {
 ; CHECK-V8M-LABEL: @remove_loop(
+; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V8M-NEXT:  entry:
-; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-V8M-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8M:       while.cond:
+; CHECK-V8M-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8M:       while.end:
+; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8M-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8M-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-V8M-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-V8M-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK-V8M:       while.cond:
-; CHECK-V8M-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK-V8M:       while.end:
 ; CHECK-V8M-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-V8M-NEXT:    ret i32 [[TMP4]]
 ;
 ; CHECK-V8A-LABEL: @remove_loop(
+; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V8A-NEXT:  entry:
-; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-V8A-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK-V8A:       while.cond:
+; CHECK-V8A-NEXT:    br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK-V8A:       while.end:
+; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-V8A-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-V8A-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-V8A-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-V8A-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK-V8A:       while.cond:
-; CHECK-V8A-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK-V8A:       while.end:
 ; CHECK-V8A-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-V8A-NEXT:    ret i32 [[TMP4]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
index f907f23e0b520..2261423766792 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
@@ -77,7 +77,6 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
 ; CHECK-NEXT:    [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]]
 ; CHECK:       for.body29.preheader:
-; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    br label [[FOR_BODY29:%.*]]
 ; CHECK:       for.body29:
 ; CHECK-NEXT:    [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ]
@@ -101,6 +100,7 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]]
 ; CHECK:       for.end40.loopexit:
+; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
 ; CHECK-NEXT:    [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, ptr [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]]
 ; CHECK-NEXT:    [[SCEVGEP94:%.*]] = getelementptr i32, ptr [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]]
diff --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 21806c7f2cdc3..1592b84480e3f 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -4,11 +4,11 @@
 define i32 @logical_and_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_and_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
@@ -28,11 +28,11 @@ exit:
 define i32 @logical_or_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_or_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
@@ -52,13 +52,13 @@ exit:
 define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_and_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
@@ -80,13 +80,13 @@ exit:
 define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_or_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
index aba7532f5ed92..c03cd95a8c861 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test3.ll
@@ -4,9 +4,9 @@
 ; is high because the loop can be deleted after the exit value rewrite.
 ;
 ; CHECK-LABEL: @_Z3fooPKcjj(
-; CHECK: udiv
 ; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]:
 ; CHECK-NOT: br {{.*}} [[LABEL]]
+; CHECK: udiv
 
 define i32 @_Z3fooPKcjj(ptr nocapture readnone %s, i32 %len, i32 %c) #0 {
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index 3c6b12dac2119..e006d9f6696ca 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -932,17 +932,17 @@ for.end:                                          ; preds = %for.body, %entry
 define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-LABEL: @ult_multiuse_profit(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[START:%.*]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 254 to i8
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i16 254 to i8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[START:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i16
+; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP2]], i16 254)
 ; CHECK-NEXT:    ret i16 [[UMAX]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
index ae108a525223e..093e25a3caa81 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr116483.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -4,15 +4,15 @@
 define i32 @test() {
 ; CHECK-LABEL: define i32 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 0, 3
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[XOR]], 329
 ; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[MUL]] to i16
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i16 [[CONV]], 8
 ; CHECK-NEXT:    [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
-; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
-; CHECK:       [[LOOP_BODY]]:
-; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
-; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/pr63763.ll b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
index 4e62e92ca07ee..427db1e67410a 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr63763.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
@@ -16,12 +16,12 @@ define i32 @test(i1 %c) {
 ; CHECK-NEXT:    [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24
 ; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]]
 ; CHECK-NEXT:    call void @use(i32 [[INVARIANT_OP]])
-; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
-; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
+; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
 ; CHECK-NEXT:    [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
 ; CHECK-NEXT:    ret i32 [[INVARIANT_OP_US]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
index 4692a542053c9..b3162de0f2245 100644
--- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
+++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
@@ -4,20 +4,21 @@
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @remove_loop(i32 %size) {
-; CHECK-LABEL: @remove_loop(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
+; CHECK-LABEL: define i32 @remove_loop(
+; CHECK-SAME: i32 [[SIZE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[WHILE_COND:.*]]
+; CHECK:       [[WHILE_COND]]:
+; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], %[[ENTRY]] ], [ [[SUB:%.*]], %[[WHILE_COND]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
+; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
+; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_COND]], label %[[WHILE_END:.*]]
+; CHECK:       [[WHILE_END]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
-; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
-; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]]
-; CHECK:       while.end:
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
index d1140affb5a4b..523414167956b 100644
--- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
@@ -10,18 +10,18 @@ define void @test(i1 %arg) personality ptr @snork {
 ; CHECK-NEXT:    br label [[BB4:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[TMP1:%.*]], [[SMAX:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP6:%.*]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMAX:%.*]]
 ; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[BB2:%.*]], label [[BB4]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP0]], [[BB1:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP1]], [[BB1:%.*]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ]
 ; CHECK-NEXT:    [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
-; CHECK-NEXT:    [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
+; CHECK-NEXT:    [[TMP6]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
 ; CHECK-NEXT:            to label [[BB7:%.*]] unwind label [[BB15:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[TMP1]] = add i32 [[TMP6]], [[INDVARS_IV]]
 ; CHECK-NEXT:    br label [[BB9:%.*]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    br i1 true, label [[BB1]], label [[BB9]]
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
index 49d6bc46dab32..3353a59ea1d12 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
@@ -23,38 +23,38 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-LABEL: define void @_Z3fn1v(
 ; CHECK-SAME: ptr writeonly captures(none) [[R:%.*]], ptr readonly captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr @b, align 4
-; CHECK-NEXT:    [[TOBOOL20:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[T:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    [[TOBOOL20:%.*]] = icmp eq i32 [[T]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL20]], label %[[FOR_END6:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_LOOPEXIT_LOOPEXIT:.*]]:
 ; CHECK-NEXT:    [[ADD_PTR_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_LCSSA_UNR:%.*]], %[[FOR_BODY3_PROL_LOOPEXIT:.*]] ], [ [[ADD_PTR_1:%.*]], %[[FOR_INC_1:.*]] ]
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A_021:%.*]], i64 1
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP1:%.*]]
-; CHECK-NEXT:    [[TMP1_PRE:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[T2:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[T1_PRE:%.*]] = load i32, ptr @b, align 4
 ; CHECK-NEXT:    br label %[[FOR_COND_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_COND_LOOPEXIT]]:
-; CHECK-NEXT:    [[T1:%.*]] = phi i32 [ [[T12:%.*]], %[[FOR_BODY]] ], [ [[TMP1_PRE]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    [[T1:%.*]] = phi i32 [ [[T12:%.*]], %[[FOR_BODY]] ], [ [[T1_PRE]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[R_1_LCSSA:%.*]] = phi ptr [ [[R_022:%.*]], %[[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
-; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi ptr [ [[A_021]], %[[FOR_BODY]] ], [ [[SCEVGEP1]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi ptr [ [[A_021:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP1]], %[[FOR_COND_LOOPEXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[T1]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[FOR_END6]], label %[[FOR_BODY]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[T12]] = phi i32 [ [[T1]], %[[FOR_COND_LOOPEXIT]] ], [ [[TMP]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[T12]] = phi i32 [ [[T1]], %[[FOR_COND_LOOPEXIT]] ], [ [[T]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[R_022]] = phi ptr [ [[R_1_LCSSA]], %[[FOR_COND_LOOPEXIT]] ], [ [[R]], %[[ENTRY]] ]
 ; CHECK-NEXT:    [[A_021]] = phi ptr [ [[A_1_LCSSA]], %[[FOR_COND_LOOPEXIT]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @c, align 4
-; CHECK-NEXT:    [[TOBOOL215:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    [[T2]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[TOBOOL215:%.*]] = icmp eq i32 [[T2]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL215]], label %[[FOR_COND_LOOPEXIT]], label %[[FOR_BODY3_PREHEADER:.*]]
 ; CHECK:       [[FOR_BODY3_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP2]], -1
-; CHECK-NEXT:    [[TMP1]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[T2]], 1
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
 ; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY3_PROL_LOOPEXIT]], label %[[FOR_BODY3_PROL:.*]]
 ; CHECK:       [[FOR_BODY3_PROL]]:
-; CHECK-NEXT:    [[DEC18_PROL:%.*]] = add nsw i32 [[TMP2]], -1
-; CHECK-NEXT:    [[TMP3_PROL:%.*]] = load i8, ptr [[A_021]], align 1
-; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0
+; CHECK-NEXT:    [[DEC18_PROL:%.*]] = add nsw i32 [[T2]], -1
+; CHECK-NEXT:    [[T3_PROL:%.*]] = load i8, ptr [[A_021]], align 1
+; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp eq i8 [[T3_PROL]], 0
 ; CHECK-NEXT:    br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]]
 ; CHECK:       [[IF_THEN_PROL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[R_022]], i64 2
@@ -69,17 +69,17 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-NEXT:    br label %[[FOR_BODY3_PROL_LOOPEXIT]]
 ; CHECK:       [[FOR_BODY3_PROL_LOOPEXIT]]:
 ; CHECK-NEXT:    [[ADD_PTR_LCSSA_UNR]] = phi ptr [ poison, %[[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_PROL]], %[[FOR_INC_PROL]] ]
-; CHECK-NEXT:    [[DEC18_IN_UNR:%.*]] = phi i32 [ [[TMP2]], %[[FOR_BODY3_PREHEADER]] ], [ [[DEC18_PROL]], %[[FOR_INC_PROL]] ]
+; CHECK-NEXT:    [[DEC18_IN_UNR:%.*]] = phi i32 [ [[T2]], %[[FOR_BODY3_PREHEADER]] ], [ [[DEC18_PROL]], %[[FOR_INC_PROL]] ]
 ; CHECK-NEXT:    [[R_117_UNR:%.*]] = phi ptr [ [[R_022]], %[[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_PROL]], %[[FOR_INC_PROL]] ]
 ; CHECK-NEXT:    [[A_116_UNR:%.*]] = phi ptr [ [[A_021]], %[[FOR_BODY3_PREHEADER]] ], [ [[INCDEC_PTR_PROL]], %[[FOR_INC_PROL]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP4]], label %[[FOR_COND_LOOPEXIT_LOOPEXIT]], label %[[FOR_BODY3:.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[T2]], 1
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[FOR_COND_LOOPEXIT_LOOPEXIT]], label %[[FOR_BODY3:.*]]
 ; CHECK:       [[FOR_BODY3]]:
 ; CHECK-NEXT:    [[DEC18_IN:%.*]] = phi i32 [ [[DEC18_1:%.*]], %[[FOR_INC_1]] ], [ [[DEC18_IN_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[R_117:%.*]] = phi ptr [ [[ADD_PTR_1]], %[[FOR_INC_1]] ], [ [[R_117_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[A_116:%.*]] = phi ptr [ [[INCDEC_PTR_1:%.*]], %[[FOR_INC_1]] ], [ [[A_116_UNR]], %[[FOR_BODY3_PROL_LOOPEXIT]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[A_116]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0
+; CHECK-NEXT:    [[T3:%.*]] = load i8, ptr [[A_116]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[T3]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]]
 ; CHECK:       [[IF_THEN]]:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[R_117]], i64 2
@@ -91,8 +91,8 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK:       [[FOR_INC]]:
 ; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A_116]], i64 1
 ; CHECK-NEXT:    [[DEC18_1]] = add nsw i32 [[DEC18_IN]], -2
-; CHECK-NEXT:    [[TMP3_1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[TMP3_1]], 0
+; CHECK-NEXT:    [[T3_1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[T3_1]], 0
 ; CHECK-NEXT:    br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]]
 ; CHECK:       [[IF_THEN_1]]:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[R_117]], i64 6
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
index 778f25f5620f2..a13c36f693bac 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
@@ -13,25 +13,25 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
 ; CHECK-NEXT:    br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4
-; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[BLOCKSIZE]], -16
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[SUM_013:%.*]] = phi i32 [ [[TMP3:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> [[TMP1]], i32 0)
-; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[SUM_013]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> [[TMP0]], i32 0)
+; CHECK-NEXT:    [[TMP2]] = add i32 [[TMP1]], [[SUM_013]]
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[BLKCNT_011]], -1
 ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds nuw i8, ptr [[PSRC_ADDR_012]], i32 16
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
 ; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[BLOCKSIZE]], -16
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
 ; CHECK-NEXT:    br label [[WHILE_END]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15
 ; CHECK-NEXT:    [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[MIDDLE_BLOCK:%.*]]



More information about the llvm-commits mailing list