[llvm] [Transforms] Add LoopNoOpElimination pass (PR #163534)

Wed Oct 15 02:29:47 PDT 2025

https://github.com/nasherm created https://github.com/llvm/llvm-project/pull/163534

 
    This patch adds the LoopNoOpEliminatioin pass including
    appropriate tests. It's been found when benchmarking the vectorizer
    and loop flattening that we sometimes generate loops of the following
    basic form
    
    ```
    vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
      %cmp = icmp ugt i64 %N, 4294967295
      br i1 %cmp, label %end, label %vector.body
    vector.body:                                      ; preds = %vector.body, %vector.ph
      %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
      %and = and i64 %index, 4294967295
      %index.next = add i64 %and, 1
      %exit.cond = icmp ugt i64 %index.next, %N
      br i1 %exit.cond, label %end, label %vector.body
    end:
      ret i64 %N
    ```
    
    In this loop the 'and' mask is introduced as a bounds check
    but is a no-op due to the fact the runtime bounds check block
    'vector.scevcheck' telegraphs information about the maximum tripcount
    of this loop. This patch allows for transformations that turn the above
    loop to
    
    ```
    vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
      %cmp = icmp ugt i64 %N, 4294967295
      br i1 %cmp, label %end, label %vector.body
    vector.body:                                      ; preds = %vector.body, %vector.ph
      %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
      %index.next = add i64 %index, 1
      %exit.cond = icmp ugt i64 %index.next, %N
      br i1 %exit.cond, label %end, label %vector.body
    end:
      ret i64 %N
    ```
    
    With this patch we've seen performance improvements of up to 8% on internal
    benchmarks tested on AArch64 cores.


>From 3c1ff1f5704ac7a5deba3795ff9822bc73b87937 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Mon, 13 Oct 2025 16:21:47 +0100
Subject: [PATCH 1/2] [Transforms] Add LoopNoOpElimination pass

This patch adds the LoopNoOpEliminatioin pass including
appropriate tests. It's been found when benchmarking the vectorizer
and loop flattening that we sometimes generate loops of the following
basic form

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %and = and i64 %index, 4294967295
  %index.next = add i64 %and, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

In this loop the 'and' mask is introduced as a bounds check
but is a no-op due to the fact the runtime bounds check block
'vector.scevcheck' telegraphs information about the maximum tripcount
of this loop. This patch allows for transformations that turn the above
loop to

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %index.next = add i64 %index, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

With this patch we've seen performance improvements of up to 8% on internal
benchmarks.

Change-Id: I9bb50138bf92da41a94173a5c6da9131e839560b
---
 .../Transforms/Scalar/LoopNoOpElimination.h   |  52 ++++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   9 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Transforms/Scalar/CMakeLists.txt     |   1 +
 .../Transforms/Scalar/LoopNoOpElimination.cpp | 228 ++++++++++++++
 .../loop-no-op-and-elim.ll                    | 292 ++++++++++++++++++
 7 files changed, 584 insertions(+)
 create mode 100644 llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
 create mode 100644 llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
 create mode 100644 llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
new file mode 100644
index 0000000000000..38da5713766f1
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
@@ -0,0 +1,52 @@
+//===- LoopNoOpElimination.h - Loop No-Op Elimination pass ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates no-op operations in loop bodies
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DominatorTree;
+class Function;
+class Instruction;
+class Loop;
+class LoopAccessInfoManager;
+class LoopInfo;
+class ScalarEvolution;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+class DataLayout;
+class SCEVExpander;
+
+/// Performs Loop No-Op Elimination Pass.
+class LoopNoOpEliminationPass : public PassInfoMixin<LoopNoOpEliminationPass> {
+public:
+  ScalarEvolution *SE;
+  LoopInfo *LI;
+  TargetTransformInfo *TTI;
+  DominatorTree *DT;
+  TargetLibraryInfo *TLI;
+  OptimizationRemarkEmitter *ORE;
+
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+private:
+  bool runImpl(Function &F);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53cf0046bd858..61c67cca17326 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -302,6 +302,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopPredication.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fea0d255cc91a..bca0ac1d3c58d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
@@ -216,6 +217,11 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
+static cl::opt<bool>
+    EnableLoopNoOpElimination("enable-loop-noop-elimination", cl::init(false),
+                              cl::Hidden,
+                              cl::desc("Enable Loop no-op elimination pass"));
+
 // Experimentally allow loop header duplication. This should allow for better
 // optimization at Oz, since loop-idiom recognition can then recognize things
 // like memcpy. If this ends up being useful for many targets, we should drop
@@ -1307,6 +1313,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(LoopVectorizePass(
       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
+  if (EnableLoopNoOpElimination)
+    FPM.addPass(LoopNoOpEliminationPass());
+
   FPM.addPass(InferAlignmentPass());
   if (IsFullLTO) {
     // The vectorizer may have significantly shortened a loop body; unroll
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 1b1652555cd28..a6b283256101b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -566,6 +566,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
+FUNCTION_PASS("loop-noop-elim", LoopNoOpEliminationPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 37dbb34605646..c37e2cc756b87 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_component_library(LLVMScalarOpts
   LoopFuse.cpp
   LoopIdiomRecognize.cpp
   LoopInstSimplify.cpp
+  LoopNoOpElimination.cpp
   LoopInterchange.cpp
   LoopFlatten.cpp
   LoopLoadElimination.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
new file mode 100644
index 0000000000000..9bafd10a91ff4
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
@@ -0,0 +1,228 @@
+//===- LoopNoOpElimination.cpp - Loop No-Op Elimination Pass --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass attempts to spot and eliminate no-op operations in loop bodies.
+// For example loop Vectorization may create loops like the following.
+//
+// vector.scevcheck:
+//   %1 = add i64 %flatten.tripcount, -1
+//   %2 = icmp ugt i64 %1, 4294967295
+//   br i1 %2, label %scalar.ph, label %vector.ph
+// vector.ph:
+//    %iv = phi i64 [ 0, %vector.scevcheck], [ %iv.next, %vector.ph ]
+//    %m  = and i64 %iv, 4294967295 ; 0xffff_fffe  no op
+//    %p  = getelementptr inbounds <4 x i32>, ptr %A, i64 %m
+//    %load = load <4 x i32>, ptr %p, align 4
+//    %1 = add <4 x i32> %load,  %X
+//    store <4 x i32> %1, ptr %p, align 4
+//    %iv.next = add nuw i64 %iv, 4
+//    %c  = icmp ult i64 %iv.next, %N
+//    br i1 %c, label %vector.ph, label %exit
+//  exit:
+//    ret void
+//
+// The vectorizer creates the SCEV check block to perform
+// runtime IV checks. This block can be used to determine true
+// range of the the IV as entry into the vector loop is only possible
+// for certain tripcount values.
+//
+// Currently this pass only supports spotting no-op AND operations in loop
+// bodies.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include <iterator>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-noop-elim"
+
+STATISTIC(NumEliminated, "Number of redundant instructions eliminated");
+
+static BasicBlock *getSCEVCheckBB(Function &F) {
+  for (BasicBlock &BB : F)
+    if (BB.getName() == "vector.scevcheck")
+      return &BB;
+
+  return nullptr;
+}
+
+// Use vector.check block to determine if we can eliminate a bounds check on
+// the IV if we know that we can only enter the vector block if the tripcount
+// is within certain bounds.
+static bool tryElimAndMaskOnPHI(Loop *L, Instruction *AndInstr, PHINode *IndVar,
+                                ScalarEvolution *SE, Function &F) {
+  Value *Op0 = AndInstr->getOperand(0);
+  Value *Op1 = AndInstr->getOperand(1);
+
+  auto *Mask = dyn_cast<ConstantInt>(Op0 == IndVar ? Op1 : Op0);
+  if (!Mask)
+    return false;
+
+  auto CheckConditional = [](BranchInst *BranchI, CmpInst *CmpI,
+                             unsigned ExpectedPred, BasicBlock *Header,
+                             BasicBlock *PreHeader, Loop *L,
+                             Value *LatchCmpV) -> bool {
+    // Make sure that the conditional operator is what we
+    // expect
+    unsigned CmpIOpcode = CmpI->getPredicate();
+    if (CmpIOpcode != ExpectedPred)
+      return false;
+
+    // Check that in the case of a true result we actually
+    // branch to the loop
+    Value *TrueDest = BranchI->getOperand(1);
+    if (TrueDest != PreHeader && TrueDest != Header)
+      return false;
+
+    // Check that the conditional variable that is used for the
+    // SCEV check is actually used in the latch compare instruction
+    auto *LatchCmpInst = L->getLatchCmpInst();
+    if (!LatchCmpInst)
+      return false;
+
+    if (LatchCmpInst->getOperand(0) != LatchCmpV &&
+        LatchCmpInst->getOperand(1) != LatchCmpV) {
+      return false;
+    }
+
+    return true;
+  };
+
+  // Determine if there's a runtime SCEV check block
+  // and use that to determine if we can elim the phinode
+  if (auto *SCEVCheckBB = getSCEVCheckBB(F)) {
+    // Determine if the SCEV check BB branches to the loop preheader
+    // or header
+    BasicBlock *PreHeader = L->getLoopPreheader();
+    BasicBlock *Header = L->getHeader();
+    if (PreHeader && PreHeader->getUniquePredecessor() != SCEVCheckBB &&
+        Header != SCEVCheckBB)
+      return false;
+
+    // We're interested in a SCEV check block with a branch instruction
+    // terminator
+    if (auto *BranchI = dyn_cast<BranchInst>(SCEVCheckBB->getTerminator())) {
+      if (!BranchI->isConditional())
+        return false;
+
+      Value *Condition = BranchI->getCondition();
+      if (auto *CmpI = dyn_cast<CmpInst>(Condition)) {
+        // Check if the condition for the terminating instruction
+        // is doing some comparison with a constant integer. If not
+        // we can't elim our AND mask
+        Value *CmpOp0 = CmpI->getOperand(0);
+        Value *CmpOp1 = CmpI->getOperand(1);
+        auto *CmpConstant = (dyn_cast<ConstantInt>(CmpOp0))
+                                ? dyn_cast<ConstantInt>(CmpOp0)
+                                : dyn_cast<ConstantInt>(CmpOp1);
+        if (!CmpConstant)
+          return false;
+
+        if ((CmpConstant == CmpOp1 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_UGT, Header,
+                              PreHeader, L, CmpOp0)) ||
+            (CmpConstant == CmpOp0 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_ULT, Header,
+                              PreHeader, L, CmpOp1))) {
+
+          // TODO: inverse operation needs to be checked
+          // We can eliminate the AND mask
+          if (CmpConstant->uge(Mask->getZExtValue())) {
+            AndInstr->replaceAllUsesWith(IndVar);
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool tryElimPHINodeUsers(Loop *L, PHINode *PN, ScalarEvolution *SE,
+                                Function &F) {
+  bool Changed = false;
+  for (auto *U : PN->users()) {
+    auto *I = dyn_cast<Instruction>(U);
+    switch (I->getOpcode()) {
+    case Instruction::And:
+      if (tryElimAndMaskOnPHI(L, I, PN, SE, F)) {
+        Changed |= true;
+        NumEliminated++;
+      }
+      break;
+    default:
+      break;
+    }
+  }
+  return Changed;
+}
+
+bool LoopNoOpEliminationPass::runImpl(Function &F) {
+  bool Changed = false;
+  for (Loop *L : *LI) {
+    LoopBlocksRPO RPOT(L);
+    RPOT.perform(LI);
+
+    for (BasicBlock *BB : RPOT)
+      for (Instruction &I : *BB)
+        if (auto *PN = dyn_cast<PHINode>(&I))
+          Changed |= tryElimPHINodeUsers(L, PN, SE, F);
+  }
+
+  return Changed;
+}
+
+PreservedAnalyses LoopNoOpEliminationPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  LI = &AM.getResult<LoopAnalysis>(F);
+  // There are no loops in the function. Return before computing other
+  // expensive analyses.
+  if (LI->empty())
+    return PreservedAnalyses::all();
+  SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+  DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+
+  if (runImpl(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<LoopAnalysis>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<LoopAccessAnalysis>();
+
+  return PA;
+}
diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
new file mode 100644
index 0000000000000..4441f6c12fcc0
--- /dev/null
+++ b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S | FileCheck %s
+
+define i64 @elim_no_op_and(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_1(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_2(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_3(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_loop_with_preheader(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_PREHEADER:.*]]
+; CHECK:       [[VECTOR_PREHEADER]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.preheader
+vector.preheader:
+  br label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.preheader ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_latch_not_using_constant(i64 %N, i64 %C) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_latch_not_using_constant(
+; CHECK-SAME: i64 [[N:%.*]], i64 [[C:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[C]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %C
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+
+}
+
+define i64 @fail_elim_no_op_and_signed_conditional(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_signed_conditional(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
+  %cmp = icmp sgt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_branch_on_false(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_branch_on_false(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[VECTOR_BODY:.*]], label %[[END:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %vector.body, label %end
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_large_mask(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_large_mask(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 1
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_mismatched_types(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_mismatched_types(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EXT:%.*]] = zext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[EXT]], 4294967295
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[AND]], [[N]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 1
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i32 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %ext = zext i32 %index to i64
+  %and = and i64 %ext, 4294967295
+  %exit.cond = icmp ugt i64 %and, %N
+  %index.next = add i32 %index, 1
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}

>From 25f2443c7f32628aff878c0c0af4de054f31f7c8 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 15 Oct 2025 09:59:09 +0100
Subject: [PATCH 2/2] Enable loop-noop-elim in test

Enable loop-noop-elim in loop-no-op-and-elim.ll test
to highlight the function of the pass.

Change-Id: I2bd963f5390efc8c45e8caa5199729b2a547807a
---
 .../LoopNoOpElimination/loop-no-op-and-elim.ll       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
index 4441f6c12fcc0..e0c658b99ee72 100644
--- a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
+++ b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -S | FileCheck %s
+; RUN: opt < %s -passes=loop-noop-elim -S | FileCheck %s
 
 define i64 @elim_no_op_and(i64 %N) {
 ; CHECK-LABEL: define i64 @elim_no_op_and(
@@ -10,7 +10,7 @@ define i64 @elim_no_op_and(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -38,7 +38,7 @@ define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -67,7 +67,7 @@ define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -96,7 +96,7 @@ define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -127,7 +127,7 @@ define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]: