[llvm] [Transforms] Add LoopNoOpElimination pass (PR #163534)

Wed Oct 15 02:30:30 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Nashe Mncube (nasherm)

<details>
<summary>Changes</summary>

 
  This patch adds the LoopNoOpEliminatioin pass including
  appropriate tests. It's been found when benchmarking the vectorizer
  and loop flattening that we sometimes generate loops of the following
  basic form

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %and = and i64 %index, 4294967295
  %index.next = add i64 %and, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

In this loop the 'and' mask is introduced as a bounds check
but is a no-op due to the fact the runtime bounds check block
'vector.scevcheck' telegraphs information about the maximum tripcount
of this loop. This patch allows for transformations that turn the above
loop to

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %index.next = add i64 %index, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

With this patch we've seen performance improvements of up to 8% on internal
benchmarks tested on AArch64 cores.


---

Patch is 25.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163534.diff


7 Files Affected:

- (added) llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h (+52) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+9) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1) 
- (added) llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp (+228) 
- (added) llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll (+292) 


``````````diff

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
new file mode 100644
index 0000000000000..38da5713766f1
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
@@ -0,0 +1,52 @@
+//===- LoopNoOpElimination.h - Loop No-Op Elimination pass ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates no-op operations in loop bodies
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DominatorTree;
+class Function;
+class Instruction;
+class Loop;
+class LoopAccessInfoManager;
+class LoopInfo;
+class ScalarEvolution;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+class DataLayout;
+class SCEVExpander;
+
+/// Performs Loop No-Op Elimination Pass.
+class LoopNoOpEliminationPass : public PassInfoMixin<LoopNoOpEliminationPass> {
+public:
+  ScalarEvolution *SE;
+  LoopInfo *LI;
+  TargetTransformInfo *TTI;
+  DominatorTree *DT;
+  TargetLibraryInfo *TLI;
+  OptimizationRemarkEmitter *ORE;
+
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+private:
+  bool runImpl(Function &F);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53cf0046bd858..61c67cca17326 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -302,6 +302,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopPredication.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fea0d255cc91a..bca0ac1d3c58d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
@@ -216,6 +217,11 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
+static cl::opt<bool>
+    EnableLoopNoOpElimination("enable-loop-noop-elimination", cl::init(false),
+                              cl::Hidden,
+                              cl::desc("Enable Loop no-op elimination pass"));
+
 // Experimentally allow loop header duplication. This should allow for better
 // optimization at Oz, since loop-idiom recognition can then recognize things
 // like memcpy. If this ends up being useful for many targets, we should drop
@@ -1307,6 +1313,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(LoopVectorizePass(
       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
+  if (EnableLoopNoOpElimination)
+    FPM.addPass(LoopNoOpEliminationPass());
+
   FPM.addPass(InferAlignmentPass());
   if (IsFullLTO) {
     // The vectorizer may have significantly shortened a loop body; unroll
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 1b1652555cd28..a6b283256101b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -566,6 +566,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
+FUNCTION_PASS("loop-noop-elim", LoopNoOpEliminationPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 37dbb34605646..c37e2cc756b87 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_component_library(LLVMScalarOpts
   LoopFuse.cpp
   LoopIdiomRecognize.cpp
   LoopInstSimplify.cpp
+  LoopNoOpElimination.cpp
   LoopInterchange.cpp
   LoopFlatten.cpp
   LoopLoadElimination.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
new file mode 100644
index 0000000000000..9bafd10a91ff4
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
@@ -0,0 +1,228 @@
+//===- LoopNoOpElimination.cpp - Loop No-Op Elimination Pass --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass attempts to spot and eliminate no-op operations in loop bodies.
+// For example loop Vectorization may create loops like the following.
+//
+// vector.scevcheck:
+//   %1 = add i64 %flatten.tripcount, -1
+//   %2 = icmp ugt i64 %1, 4294967295
+//   br i1 %2, label %scalar.ph, label %vector.ph
+// vector.ph:
+//    %iv = phi i64 [ 0, %vector.scevcheck], [ %iv.next, %vector.ph ]
+//    %m  = and i64 %iv, 4294967295 ; 0xffff_fffe  no op
+//    %p  = getelementptr inbounds <4 x i32>, ptr %A, i64 %m
+//    %load = load <4 x i32>, ptr %p, align 4
+//    %1 = add <4 x i32> %load,  %X
+//    store <4 x i32> %1, ptr %p, align 4
+//    %iv.next = add nuw i64 %iv, 4
+//    %c  = icmp ult i64 %iv.next, %N
+//    br i1 %c, label %vector.ph, label %exit
+//  exit:
+//    ret void
+//
+// The vectorizer creates the SCEV check block to perform
+// runtime IV checks. This block can be used to determine true
+// range of the the IV as entry into the vector loop is only possible
+// for certain tripcount values.
+//
+// Currently this pass only supports spotting no-op AND operations in loop
+// bodies.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include <iterator>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-noop-elim"
+
+STATISTIC(NumEliminated, "Number of redundant instructions eliminated");
+
+static BasicBlock *getSCEVCheckBB(Function &F) {
+  for (BasicBlock &BB : F)
+    if (BB.getName() == "vector.scevcheck")
+      return &BB;
+
+  return nullptr;
+}
+
+// Use vector.check block to determine if we can eliminate a bounds check on
+// the IV if we know that we can only enter the vector block if the tripcount
+// is within certain bounds.
+static bool tryElimAndMaskOnPHI(Loop *L, Instruction *AndInstr, PHINode *IndVar,
+                                ScalarEvolution *SE, Function &F) {
+  Value *Op0 = AndInstr->getOperand(0);
+  Value *Op1 = AndInstr->getOperand(1);
+
+  auto *Mask = dyn_cast<ConstantInt>(Op0 == IndVar ? Op1 : Op0);
+  if (!Mask)
+    return false;
+
+  auto CheckConditional = [](BranchInst *BranchI, CmpInst *CmpI,
+                             unsigned ExpectedPred, BasicBlock *Header,
+                             BasicBlock *PreHeader, Loop *L,
+                             Value *LatchCmpV) -> bool {
+    // Make sure that the conditional operator is what we
+    // expect
+    unsigned CmpIOpcode = CmpI->getPredicate();
+    if (CmpIOpcode != ExpectedPred)
+      return false;
+
+    // Check that in the case of a true result we actually
+    // branch to the loop
+    Value *TrueDest = BranchI->getOperand(1);
+    if (TrueDest != PreHeader && TrueDest != Header)
+      return false;
+
+    // Check that the conditional variable that is used for the
+    // SCEV check is actually used in the latch compare instruction
+    auto *LatchCmpInst = L->getLatchCmpInst();
+    if (!LatchCmpInst)
+      return false;
+
+    if (LatchCmpInst->getOperand(0) != LatchCmpV &&
+        LatchCmpInst->getOperand(1) != LatchCmpV) {
+      return false;
+    }
+
+    return true;
+  };
+
+  // Determine if there's a runtime SCEV check block
+  // and use that to determine if we can elim the phinode
+  if (auto *SCEVCheckBB = getSCEVCheckBB(F)) {
+    // Determine if the SCEV check BB branches to the loop preheader
+    // or header
+    BasicBlock *PreHeader = L->getLoopPreheader();
+    BasicBlock *Header = L->getHeader();
+    if (PreHeader && PreHeader->getUniquePredecessor() != SCEVCheckBB &&
+        Header != SCEVCheckBB)
+      return false;
+
+    // We're interested in a SCEV check block with a branch instruction
+    // terminator
+    if (auto *BranchI = dyn_cast<BranchInst>(SCEVCheckBB->getTerminator())) {
+      if (!BranchI->isConditional())
+        return false;
+
+      Value *Condition = BranchI->getCondition();
+      if (auto *CmpI = dyn_cast<CmpInst>(Condition)) {
+        // Check if the condition for the terminating instruction
+        // is doing some comparison with a constant integer. If not
+        // we can't elim our AND mask
+        Value *CmpOp0 = CmpI->getOperand(0);
+        Value *CmpOp1 = CmpI->getOperand(1);
+        auto *CmpConstant = (dyn_cast<ConstantInt>(CmpOp0))
+                                ? dyn_cast<ConstantInt>(CmpOp0)
+                                : dyn_cast<ConstantInt>(CmpOp1);
+        if (!CmpConstant)
+          return false;
+
+        if ((CmpConstant == CmpOp1 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_UGT, Header,
+                              PreHeader, L, CmpOp0)) ||
+            (CmpConstant == CmpOp0 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_ULT, Header,
+                              PreHeader, L, CmpOp1))) {
+
+          // TODO: inverse operation needs to be checked
+          // We can eliminate the AND mask
+          if (CmpConstant->uge(Mask->getZExtValue())) {
+            AndInstr->replaceAllUsesWith(IndVar);
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool tryElimPHINodeUsers(Loop *L, PHINode *PN, ScalarEvolution *SE,
+                                Function &F) {
+  bool Changed = false;
+  for (auto *U : PN->users()) {
+    auto *I = dyn_cast<Instruction>(U);
+    switch (I->getOpcode()) {
+    case Instruction::And:
+      if (tryElimAndMaskOnPHI(L, I, PN, SE, F)) {
+        Changed |= true;
+        NumEliminated++;
+      }
+      break;
+    default:
+      break;
+    }
+  }
+  return Changed;
+}
+
+bool LoopNoOpEliminationPass::runImpl(Function &F) {
+  bool Changed = false;
+  for (Loop *L : *LI) {
+    LoopBlocksRPO RPOT(L);
+    RPOT.perform(LI);
+
+    for (BasicBlock *BB : RPOT)
+      for (Instruction &I : *BB)
+        if (auto *PN = dyn_cast<PHINode>(&I))
+          Changed |= tryElimPHINodeUsers(L, PN, SE, F);
+  }
+
+  return Changed;
+}
+
+PreservedAnalyses LoopNoOpEliminationPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  LI = &AM.getResult<LoopAnalysis>(F);
+  // There are no loops in the function. Return before computing other
+  // expensive analyses.
+  if (LI->empty())
+    return PreservedAnalyses::all();
+  SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+  DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+
+  if (runImpl(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<LoopAnalysis>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<LoopAccessAnalysis>();
+
+  return PA;
+}
diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
new file mode 100644
index 0000000000000..e0c658b99ee72
--- /dev/null
+++ b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=loop-noop-elim -S | FileCheck %s
+
+define i64 @elim_no_op_and(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_1(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_2(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_3(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_loop_with_preheader(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_PREHEADER:.*]]
+; CHECK:       [[VECTOR_PREHEADER]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.preheader
+vector.preheader:
+  br label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.preheader ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_latch_not_using_constant(i64 %N, i64 %C) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_latch_not_using_constant(
+; CHECK-SAME: i64 [[N:%.*]], i64 [[C:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/163534