[llvm] [Transforms] Add LoopNoOpElimination pass (PR #163534)

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 24 08:11:58 PDT 2025


https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/163534

>From 3c1ff1f5704ac7a5deba3795ff9822bc73b87937 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Mon, 13 Oct 2025 16:21:47 +0100
Subject: [PATCH 1/6] [Transforms] Add LoopNoOpElimination pass

This patch adds the LoopNoOpEliminatioin pass including
appropriate tests. It's been found when benchmarking the vectorizer
and loop flattening that we sometimes generate loops of the following
basic form

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %and = and i64 %index, 4294967295
  %index.next = add i64 %and, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

In this loop the 'and' mask is introduced as a bounds check
but is a no-op due to the fact the runtime bounds check block
'vector.scevcheck' telegraphs information about the maximum tripcount
of this loop. This patch allows for transformations that turn the above
loop to

```
vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
  %cmp = icmp ugt i64 %N, 4294967295
  br i1 %cmp, label %end, label %vector.body
vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
  %index.next = add i64 %index, 1
  %exit.cond = icmp ugt i64 %index.next, %N
  br i1 %exit.cond, label %end, label %vector.body
end:
  ret i64 %N
```

With this patch we've seen performance improvements of up to 8% on internal
benchmarks.

Change-Id: I9bb50138bf92da41a94173a5c6da9131e839560b
---
 .../Transforms/Scalar/LoopNoOpElimination.h   |  52 ++++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   9 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Transforms/Scalar/CMakeLists.txt     |   1 +
 .../Transforms/Scalar/LoopNoOpElimination.cpp | 228 ++++++++++++++
 .../loop-no-op-and-elim.ll                    | 292 ++++++++++++++++++
 7 files changed, 584 insertions(+)
 create mode 100644 llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
 create mode 100644 llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
 create mode 100644 llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
new file mode 100644
index 0000000000000..38da5713766f1
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
@@ -0,0 +1,52 @@
+//===- LoopNoOpElimination.h - Loop No-Op Elimination pass ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates no-op operations in loop bodies
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DominatorTree;
+class Function;
+class Instruction;
+class Loop;
+class LoopAccessInfoManager;
+class LoopInfo;
+class ScalarEvolution;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+class DataLayout;
+class SCEVExpander;
+
+/// Performs Loop No-Op Elimination Pass.
+class LoopNoOpEliminationPass : public PassInfoMixin<LoopNoOpEliminationPass> {
+public:
+  ScalarEvolution *SE;
+  LoopInfo *LI;
+  TargetTransformInfo *TTI;
+  DominatorTree *DT;
+  TargetLibraryInfo *TLI;
+  OptimizationRemarkEmitter *ORE;
+
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+private:
+  bool runImpl(Function &F);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53cf0046bd858..61c67cca17326 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -302,6 +302,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopPredication.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fea0d255cc91a..bca0ac1d3c58d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
@@ -216,6 +217,11 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
+static cl::opt<bool>
+    EnableLoopNoOpElimination("enable-loop-noop-elimination", cl::init(false),
+                              cl::Hidden,
+                              cl::desc("Enable Loop no-op elimination pass"));
+
 // Experimentally allow loop header duplication. This should allow for better
 // optimization at Oz, since loop-idiom recognition can then recognize things
 // like memcpy. If this ends up being useful for many targets, we should drop
@@ -1307,6 +1313,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(LoopVectorizePass(
       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
+  if (EnableLoopNoOpElimination)
+    FPM.addPass(LoopNoOpEliminationPass());
+
   FPM.addPass(InferAlignmentPass());
   if (IsFullLTO) {
     // The vectorizer may have significantly shortened a loop body; unroll
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 1b1652555cd28..a6b283256101b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -566,6 +566,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
+FUNCTION_PASS("loop-noop-elim", LoopNoOpEliminationPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 37dbb34605646..c37e2cc756b87 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_component_library(LLVMScalarOpts
   LoopFuse.cpp
   LoopIdiomRecognize.cpp
   LoopInstSimplify.cpp
+  LoopNoOpElimination.cpp
   LoopInterchange.cpp
   LoopFlatten.cpp
   LoopLoadElimination.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
new file mode 100644
index 0000000000000..9bafd10a91ff4
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
@@ -0,0 +1,228 @@
+//===- LoopNoOpElimination.cpp - Loop No-Op Elimination Pass --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass attempts to spot and eliminate no-op operations in loop bodies.
+// For example loop Vectorization may create loops like the following.
+//
+// vector.scevcheck:
+//   %1 = add i64 %flatten.tripcount, -1
+//   %2 = icmp ugt i64 %1, 4294967295
+//   br i1 %2, label %scalar.ph, label %vector.ph
+// vector.ph:
+//    %iv = phi i64 [ 0, %vector.scevcheck], [ %iv.next, %vector.ph ]
+//    %m  = and i64 %iv, 4294967295 ; 0xffff_fffe  no op
+//    %p  = getelementptr inbounds <4 x i32>, ptr %A, i64 %m
+//    %load = load <4 x i32>, ptr %p, align 4
+//    %1 = add <4 x i32> %load,  %X
+//    store <4 x i32> %1, ptr %p, align 4
+//    %iv.next = add nuw i64 %iv, 4
+//    %c  = icmp ult i64 %iv.next, %N
+//    br i1 %c, label %vector.ph, label %exit
+//  exit:
+//    ret void
+//
+// The vectorizer creates the SCEV check block to perform
+// runtime IV checks. This block can be used to determine true
+// range of the the IV as entry into the vector loop is only possible
+// for certain tripcount values.
+//
+// Currently this pass only supports spotting no-op AND operations in loop
+// bodies.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include <iterator>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-noop-elim"
+
+STATISTIC(NumEliminated, "Number of redundant instructions eliminated");
+
+static BasicBlock *getSCEVCheckBB(Function &F) {
+  for (BasicBlock &BB : F)
+    if (BB.getName() == "vector.scevcheck")
+      return &BB;
+
+  return nullptr;
+}
+
+// Use vector.check block to determine if we can eliminate a bounds check on
+// the IV if we know that we can only enter the vector block if the tripcount
+// is within certain bounds.
+static bool tryElimAndMaskOnPHI(Loop *L, Instruction *AndInstr, PHINode *IndVar,
+                                ScalarEvolution *SE, Function &F) {
+  Value *Op0 = AndInstr->getOperand(0);
+  Value *Op1 = AndInstr->getOperand(1);
+
+  auto *Mask = dyn_cast<ConstantInt>(Op0 == IndVar ? Op1 : Op0);
+  if (!Mask)
+    return false;
+
+  auto CheckConditional = [](BranchInst *BranchI, CmpInst *CmpI,
+                             unsigned ExpectedPred, BasicBlock *Header,
+                             BasicBlock *PreHeader, Loop *L,
+                             Value *LatchCmpV) -> bool {
+    // Make sure that the conditional operator is what we
+    // expect
+    unsigned CmpIOpcode = CmpI->getPredicate();
+    if (CmpIOpcode != ExpectedPred)
+      return false;
+
+    // Check that in the case of a true result we actually
+    // branch to the loop
+    Value *TrueDest = BranchI->getOperand(1);
+    if (TrueDest != PreHeader && TrueDest != Header)
+      return false;
+
+    // Check that the conditional variable that is used for the
+    // SCEV check is actually used in the latch compare instruction
+    auto *LatchCmpInst = L->getLatchCmpInst();
+    if (!LatchCmpInst)
+      return false;
+
+    if (LatchCmpInst->getOperand(0) != LatchCmpV &&
+        LatchCmpInst->getOperand(1) != LatchCmpV) {
+      return false;
+    }
+
+    return true;
+  };
+
+  // Determine if there's a runtime SCEV check block
+  // and use that to determine if we can elim the phinode
+  if (auto *SCEVCheckBB = getSCEVCheckBB(F)) {
+    // Determine if the SCEV check BB branches to the loop preheader
+    // or header
+    BasicBlock *PreHeader = L->getLoopPreheader();
+    BasicBlock *Header = L->getHeader();
+    if (PreHeader && PreHeader->getUniquePredecessor() != SCEVCheckBB &&
+        Header != SCEVCheckBB)
+      return false;
+
+    // We're interested in a SCEV check block with a branch instruction
+    // terminator
+    if (auto *BranchI = dyn_cast<BranchInst>(SCEVCheckBB->getTerminator())) {
+      if (!BranchI->isConditional())
+        return false;
+
+      Value *Condition = BranchI->getCondition();
+      if (auto *CmpI = dyn_cast<CmpInst>(Condition)) {
+        // Check if the condition for the terminating instruction
+        // is doing some comparison with a constant integer. If not
+        // we can't elim our AND mask
+        Value *CmpOp0 = CmpI->getOperand(0);
+        Value *CmpOp1 = CmpI->getOperand(1);
+        auto *CmpConstant = (dyn_cast<ConstantInt>(CmpOp0))
+                                ? dyn_cast<ConstantInt>(CmpOp0)
+                                : dyn_cast<ConstantInt>(CmpOp1);
+        if (!CmpConstant)
+          return false;
+
+        if ((CmpConstant == CmpOp1 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_UGT, Header,
+                              PreHeader, L, CmpOp0)) ||
+            (CmpConstant == CmpOp0 &&
+             CheckConditional(BranchI, CmpI, CmpInst::ICMP_ULT, Header,
+                              PreHeader, L, CmpOp1))) {
+
+          // TODO: inverse operation needs to be checked
+          // We can eliminate the AND mask
+          if (CmpConstant->uge(Mask->getZExtValue())) {
+            AndInstr->replaceAllUsesWith(IndVar);
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool tryElimPHINodeUsers(Loop *L, PHINode *PN, ScalarEvolution *SE,
+                                Function &F) {
+  bool Changed = false;
+  for (auto *U : PN->users()) {
+    auto *I = dyn_cast<Instruction>(U);
+    switch (I->getOpcode()) {
+    case Instruction::And:
+      if (tryElimAndMaskOnPHI(L, I, PN, SE, F)) {
+        Changed |= true;
+        NumEliminated++;
+      }
+      break;
+    default:
+      break;
+    }
+  }
+  return Changed;
+}
+
+bool LoopNoOpEliminationPass::runImpl(Function &F) {
+  bool Changed = false;
+  for (Loop *L : *LI) {
+    LoopBlocksRPO RPOT(L);
+    RPOT.perform(LI);
+
+    for (BasicBlock *BB : RPOT)
+      for (Instruction &I : *BB)
+        if (auto *PN = dyn_cast<PHINode>(&I))
+          Changed |= tryElimPHINodeUsers(L, PN, SE, F);
+  }
+
+  return Changed;
+}
+
+PreservedAnalyses LoopNoOpEliminationPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  LI = &AM.getResult<LoopAnalysis>(F);
+  // There are no loops in the function. Return before computing other
+  // expensive analyses.
+  if (LI->empty())
+    return PreservedAnalyses::all();
+  SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+  DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+
+  if (runImpl(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<LoopAnalysis>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<LoopAccessAnalysis>();
+
+  return PA;
+}
diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
new file mode 100644
index 0000000000000..4441f6c12fcc0
--- /dev/null
+++ b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S | FileCheck %s
+
+define i64 @elim_no_op_and(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_1(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_2(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ult i64 4294967295, %N
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_3(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ult i64 %N, %index.next
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+
+define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
+; CHECK-LABEL: define i64 @elim_no_op_and_loop_with_preheader(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_PREHEADER:.*]]
+; CHECK:       [[VECTOR_PREHEADER]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.preheader
+vector.preheader:
+  br label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.preheader ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_latch_not_using_constant(i64 %N, i64 %C) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_latch_not_using_constant(
+; CHECK-SAME: i64 [[N:%.*]], i64 [[C:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[C]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %C
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+
+}
+
+define i64 @fail_elim_no_op_and_signed_conditional(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_signed_conditional(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
+  %cmp = icmp sgt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_branch_on_false(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_branch_on_false(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[VECTOR_BODY:.*]], label %[[END:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %vector.body, label %end
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_large_mask(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_large_mask(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 1
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %and = and i64 %index, 4294967295
+  %index.next = add i64 %and, 1
+  %exit.cond = icmp ugt i64 %index.next, %N
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}
+
+define i64 @fail_elim_no_op_and_mismatched_types(i64 %N) {
+; CHECK-LABEL: define i64 @fail_elim_no_op_and_mismatched_types(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
+; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EXT:%.*]] = zext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[EXT]], 4294967295
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[AND]], [[N]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 1
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i64 [[N]]
+;
+vector.scevcheck:
+  %cmp = icmp ugt i64 %N, 4294967295
+  br i1 %cmp, label %end, label %vector.body
+vector.body:
+  %index = phi i32 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
+  %ext = zext i32 %index to i64
+  %and = and i64 %ext, 4294967295
+  %exit.cond = icmp ugt i64 %and, %N
+  %index.next = add i32 %index, 1
+  br i1 %exit.cond, label %end, label %vector.body
+end:
+  ret i64 %N
+}

>From 25f2443c7f32628aff878c0c0af4de054f31f7c8 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 15 Oct 2025 09:59:09 +0100
Subject: [PATCH 2/6] Enable loop-noop-elim in test

Enable loop-noop-elim in loop-no-op-and-elim.ll test
to highlight the function of the pass.

Change-Id: I2bd963f5390efc8c45e8caa5199729b2a547807a
---
 .../LoopNoOpElimination/loop-no-op-and-elim.ll       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
index 4441f6c12fcc0..e0c658b99ee72 100644
--- a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
+++ b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -S | FileCheck %s
+; RUN: opt < %s -passes=loop-noop-elim -S | FileCheck %s
 
 define i64 @elim_no_op_and(i64 %N) {
 ; CHECK-LABEL: define i64 @elim_no_op_and(
@@ -10,7 +10,7 @@ define i64 @elim_no_op_and(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -38,7 +38,7 @@ define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -67,7 +67,7 @@ define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -96,7 +96,7 @@ define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:
@@ -127,7 +127,7 @@ define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
 ; CHECK:       [[END]]:

>From f3ef02e53d4426eb8b2f13a6734d0fca41d55202 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 15 Oct 2025 11:35:34 +0100
Subject: [PATCH 3/6] Code formatting

Change-Id: I004c6b1c3a6889e20d0138c59bd570eadfd7896e
---
 llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
index 38da5713766f1..3a92aeebd50ef 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
@@ -41,8 +41,8 @@ class LoopNoOpEliminationPass : public PassInfoMixin<LoopNoOpEliminationPass> {
   TargetLibraryInfo *TLI;
   OptimizationRemarkEmitter *ORE;
 
-
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
 private:
   bool runImpl(Function &F);
 };

>From 3eed0a58fe7106db615d3e46f5eb1127e38e353a Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 24 Oct 2025 15:42:14 +0100
Subject: [PATCH 4/6] Move LoopNoOpElim logic to VPlan

This patch responds to review comments by moving
LoopNoOpElimination logic to a VPlanTransform

Change-Id: I59651481c17595d9a1ec6c5e3b1bebef157378a2
---
 .../Transforms/Scalar/LoopNoOpElimination.h   |  52 ----
 llvm/lib/Passes/PassBuilder.cpp               |   1 -
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   9 -
 llvm/lib/Passes/PassRegistry.def              |   1 -
 llvm/lib/Transforms/Scalar/CMakeLists.txt     |   1 -
 .../Transforms/Scalar/LoopNoOpElimination.cpp | 228 --------------
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 140 +++++++++
 .../Transforms/Vectorize/VPlanTransforms.h    |   5 +
 .../loop-no-op-and-elim.ll                    | 292 ------------------
 ...vplan-transforms-remove-redundant-masks.ll |  88 ++++++
 10 files changed, 233 insertions(+), 584 deletions(-)
 delete mode 100644 llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
 delete mode 100644 llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
 delete mode 100644 llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
deleted file mode 100644
index 3a92aeebd50ef..0000000000000
--- a/llvm/include/llvm/Transforms/Scalar/LoopNoOpElimination.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===- LoopNoOpElimination.h - Loop No-Op Elimination pass ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass eliminates no-op operations in loop bodies
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
-#define LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
-
-#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-
-class DominatorTree;
-class Function;
-class Instruction;
-class Loop;
-class LoopAccessInfoManager;
-class LoopInfo;
-class ScalarEvolution;
-class TargetLibraryInfo;
-class TargetTransformInfo;
-class OptimizationRemarkEmitter;
-class DataLayout;
-class SCEVExpander;
-
-/// Performs Loop No-Op Elimination Pass.
-class LoopNoOpEliminationPass : public PassInfoMixin<LoopNoOpEliminationPass> {
-public:
-  ScalarEvolution *SE;
-  LoopInfo *LI;
-  TargetTransformInfo *TTI;
-  DominatorTree *DT;
-  TargetLibraryInfo *TLI;
-  OptimizationRemarkEmitter *ORE;
-
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
-  bool runImpl(Function &F);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_SCALAR_LOOPNOOPELIMINATION_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 61c67cca17326..53cf0046bd858 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -302,7 +302,6 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
-#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopPredication.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index bca0ac1d3c58d..fea0d255cc91a 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,7 +110,6 @@
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
-#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
@@ -217,11 +216,6 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
-static cl::opt<bool>
-    EnableLoopNoOpElimination("enable-loop-noop-elimination", cl::init(false),
-                              cl::Hidden,
-                              cl::desc("Enable Loop no-op elimination pass"));
-
 // Experimentally allow loop header duplication. This should allow for better
 // optimization at Oz, since loop-idiom recognition can then recognize things
 // like memcpy. If this ends up being useful for many targets, we should drop
@@ -1313,9 +1307,6 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(LoopVectorizePass(
       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
-  if (EnableLoopNoOpElimination)
-    FPM.addPass(LoopNoOpEliminationPass());
-
   FPM.addPass(InferAlignmentPass());
   if (IsFullLTO) {
     // The vectorizer may have significantly shortened a loop body; unroll
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index a6b283256101b..1b1652555cd28 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -566,7 +566,6 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
-FUNCTION_PASS("loop-noop-elim", LoopNoOpEliminationPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index c37e2cc756b87..37dbb34605646 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -37,7 +37,6 @@ add_llvm_component_library(LLVMScalarOpts
   LoopFuse.cpp
   LoopIdiomRecognize.cpp
   LoopInstSimplify.cpp
-  LoopNoOpElimination.cpp
   LoopInterchange.cpp
   LoopFlatten.cpp
   LoopLoadElimination.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp b/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
deleted file mode 100644
index 9bafd10a91ff4..0000000000000
--- a/llvm/lib/Transforms/Scalar/LoopNoOpElimination.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-//===- LoopNoOpElimination.cpp - Loop No-Op Elimination Pass --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass attempts to spot and eliminate no-op operations in loop bodies.
-// For example loop Vectorization may create loops like the following.
-//
-// vector.scevcheck:
-//   %1 = add i64 %flatten.tripcount, -1
-//   %2 = icmp ugt i64 %1, 4294967295
-//   br i1 %2, label %scalar.ph, label %vector.ph
-// vector.ph:
-//    %iv = phi i64 [ 0, %vector.scevcheck], [ %iv.next, %vector.ph ]
-//    %m  = and i64 %iv, 4294967295 ; 0xffff_fffe  no op
-//    %p  = getelementptr inbounds <4 x i32>, ptr %A, i64 %m
-//    %load = load <4 x i32>, ptr %p, align 4
-//    %1 = add <4 x i32> %load,  %X
-//    store <4 x i32> %1, ptr %p, align 4
-//    %iv.next = add nuw i64 %iv, 4
-//    %c  = icmp ult i64 %iv.next, %N
-//    br i1 %c, label %vector.ph, label %exit
-//  exit:
-//    ret void
-//
-// The vectorizer creates the SCEV check block to perform
-// runtime IV checks. This block can be used to determine true
-// range of the the IV as entry into the vector loop is only possible
-// for certain tripcount values.
-//
-// Currently this pass only supports spotting no-op AND operations in loop
-// bodies.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/LoopNoOpElimination.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
-#include <iterator>
-#include <optional>
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-noop-elim"
-
-STATISTIC(NumEliminated, "Number of redundant instructions eliminated");
-
-static BasicBlock *getSCEVCheckBB(Function &F) {
-  for (BasicBlock &BB : F)
-    if (BB.getName() == "vector.scevcheck")
-      return &BB;
-
-  return nullptr;
-}
-
-// Use vector.check block to determine if we can eliminate a bounds check on
-// the IV if we know that we can only enter the vector block if the tripcount
-// is within certain bounds.
-static bool tryElimAndMaskOnPHI(Loop *L, Instruction *AndInstr, PHINode *IndVar,
-                                ScalarEvolution *SE, Function &F) {
-  Value *Op0 = AndInstr->getOperand(0);
-  Value *Op1 = AndInstr->getOperand(1);
-
-  auto *Mask = dyn_cast<ConstantInt>(Op0 == IndVar ? Op1 : Op0);
-  if (!Mask)
-    return false;
-
-  auto CheckConditional = [](BranchInst *BranchI, CmpInst *CmpI,
-                             unsigned ExpectedPred, BasicBlock *Header,
-                             BasicBlock *PreHeader, Loop *L,
-                             Value *LatchCmpV) -> bool {
-    // Make sure that the conditional operator is what we
-    // expect
-    unsigned CmpIOpcode = CmpI->getPredicate();
-    if (CmpIOpcode != ExpectedPred)
-      return false;
-
-    // Check that in the case of a true result we actually
-    // branch to the loop
-    Value *TrueDest = BranchI->getOperand(1);
-    if (TrueDest != PreHeader && TrueDest != Header)
-      return false;
-
-    // Check that the conditional variable that is used for the
-    // SCEV check is actually used in the latch compare instruction
-    auto *LatchCmpInst = L->getLatchCmpInst();
-    if (!LatchCmpInst)
-      return false;
-
-    if (LatchCmpInst->getOperand(0) != LatchCmpV &&
-        LatchCmpInst->getOperand(1) != LatchCmpV) {
-      return false;
-    }
-
-    return true;
-  };
-
-  // Determine if there's a runtime SCEV check block
-  // and use that to determine if we can elim the phinode
-  if (auto *SCEVCheckBB = getSCEVCheckBB(F)) {
-    // Determine if the SCEV check BB branches to the loop preheader
-    // or header
-    BasicBlock *PreHeader = L->getLoopPreheader();
-    BasicBlock *Header = L->getHeader();
-    if (PreHeader && PreHeader->getUniquePredecessor() != SCEVCheckBB &&
-        Header != SCEVCheckBB)
-      return false;
-
-    // We're interested in a SCEV check block with a branch instruction
-    // terminator
-    if (auto *BranchI = dyn_cast<BranchInst>(SCEVCheckBB->getTerminator())) {
-      if (!BranchI->isConditional())
-        return false;
-
-      Value *Condition = BranchI->getCondition();
-      if (auto *CmpI = dyn_cast<CmpInst>(Condition)) {
-        // Check if the condition for the terminating instruction
-        // is doing some comparison with a constant integer. If not
-        // we can't elim our AND mask
-        Value *CmpOp0 = CmpI->getOperand(0);
-        Value *CmpOp1 = CmpI->getOperand(1);
-        auto *CmpConstant = (dyn_cast<ConstantInt>(CmpOp0))
-                                ? dyn_cast<ConstantInt>(CmpOp0)
-                                : dyn_cast<ConstantInt>(CmpOp1);
-        if (!CmpConstant)
-          return false;
-
-        if ((CmpConstant == CmpOp1 &&
-             CheckConditional(BranchI, CmpI, CmpInst::ICMP_UGT, Header,
-                              PreHeader, L, CmpOp0)) ||
-            (CmpConstant == CmpOp0 &&
-             CheckConditional(BranchI, CmpI, CmpInst::ICMP_ULT, Header,
-                              PreHeader, L, CmpOp1))) {
-
-          // TODO: inverse operation needs to be checked
-          // We can eliminate the AND mask
-          if (CmpConstant->uge(Mask->getZExtValue())) {
-            AndInstr->replaceAllUsesWith(IndVar);
-            return true;
-          }
-        }
-      }
-    }
-  }
-
-  return false;
-}
-
-static bool tryElimPHINodeUsers(Loop *L, PHINode *PN, ScalarEvolution *SE,
-                                Function &F) {
-  bool Changed = false;
-  for (auto *U : PN->users()) {
-    auto *I = dyn_cast<Instruction>(U);
-    switch (I->getOpcode()) {
-    case Instruction::And:
-      if (tryElimAndMaskOnPHI(L, I, PN, SE, F)) {
-        Changed |= true;
-        NumEliminated++;
-      }
-      break;
-    default:
-      break;
-    }
-  }
-  return Changed;
-}
-
-bool LoopNoOpEliminationPass::runImpl(Function &F) {
-  bool Changed = false;
-  for (Loop *L : *LI) {
-    LoopBlocksRPO RPOT(L);
-    RPOT.perform(LI);
-
-    for (BasicBlock *BB : RPOT)
-      for (Instruction &I : *BB)
-        if (auto *PN = dyn_cast<PHINode>(&I))
-          Changed |= tryElimPHINodeUsers(L, PN, SE, F);
-  }
-
-  return Changed;
-}
-
-PreservedAnalyses LoopNoOpEliminationPass::run(Function &F,
-                                               FunctionAnalysisManager &AM) {
-  LI = &AM.getResult<LoopAnalysis>(F);
-  // There are no loops in the function. Return before computing other
-  // expensive analyses.
-  if (LI->empty())
-    return PreservedAnalyses::all();
-  SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
-  DT = &AM.getResult<DominatorTreeAnalysis>(F);
-  TLI = &AM.getResult<TargetLibraryAnalysis>(F);
-
-  if (runImpl(F))
-    return PreservedAnalyses::all();
-
-  PreservedAnalyses PA;
-  PA.preserve<LoopAnalysis>();
-  PA.preserve<DominatorTreeAnalysis>();
-  PA.preserve<ScalarEvolutionAnalysis>();
-  PA.preserve<LoopAccessAnalysis>();
-
-  return PA;
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 40b7e8df7aec9..ac356f397b5ca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4406,3 +4406,143 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
     }
   }
 }
+
+// Use vector.check block to determine if we can eliminate a bounds check on
+// the IV if we know that we can only enter the vector block if the tripcount
+// is within certain bounds.
+static bool canElimAndMaskOnPHI(Instruction *I, VPIRBasicBlock *SCEVCheckBB,
+                                Value *SCEVCheckConditional) {
+
+  if (I->getOpcode() != Instruction::And)
+    return false;
+
+  Value *Op0 = I->getOperand(0);
+  Value *Op1 = I->getOperand(1);
+
+  PHINode *IndVar;
+  ConstantInt *Mask;
+
+  if (Mask = dyn_cast<ConstantInt>(Op0))
+    IndVar = dyn_cast<PHINode>(Op1);
+  else if (Mask = dyn_cast<ConstantInt>(Op1))
+    IndVar = dyn_cast<PHINode>(Op0);
+
+  if (!Mask || !IndVar)
+    return false;
+
+  if (auto *CmpI = dyn_cast<CmpInst>(SCEVCheckConditional)) {
+    // Check if the condition for the terminating instruction
+    // is doing some comparison with a constant integer. If not
+    // we can't elim our AND mask
+    Value *CmpOp0 = CmpI->getOperand(0);
+    Value *CmpOp1 = CmpI->getOperand(1);
+    auto *CmpConstant = (dyn_cast<ConstantInt>(CmpOp0))
+                            ? dyn_cast<ConstantInt>(CmpOp0)
+                            : dyn_cast<ConstantInt>(CmpOp1);
+    if (!CmpConstant)
+      return false;
+
+    unsigned CmpIOpcode = CmpI->getPredicate();
+    if (((CmpConstant == CmpOp1 && CmpIOpcode == CmpInst::ICMP_UGT) ||
+         (CmpConstant == CmpOp0 && CmpIOpcode == CmpInst::ICMP_ULT)) &&
+        (CmpConstant->uge(Mask->getZExtValue())))
+      return true;
+  }
+  return false;
+}
+
+// Check that there's a path from the src BB to the dest BB
+static bool CheckPathFromSrcBBToDestBB(VPBlockBase *Src, VPBlockBase *Dest) {
+  if (!Src || !Dest)
+    return false;
+
+  for (auto *VPBB : Src->getSuccessors()) {
+    if (VPBB == Dest) {
+      return true;
+    } else if (VPBB->getNumSuccessors() > 0 &&
+               CheckPathFromSrcBBToDestBB(VPBB, Dest))
+      return true;
+  }
+  return false;
+};
+
+// Attempt to spot and eliminate no-op AND operations in loop bodies.
+// For example loop Vectorization may create loops like the following.
+//
+// vector.scevcheck:
+//   %1 = add i64 %flatten.tripcount, -1
+//   %2 = icmp ugt i64 %1, 4294967295
+//   br i1 %2, label %scalar.ph, label %vector.ph
+// vector.ph:
+//    %iv = phi i64 [ 0, %vector.scevcheck], [ %iv.next, %vector.ph ]
+//    %m  = and i64 %iv, 4294967295 ; 0xffff_fffe  no op
+//    %p  = getelementptr inbounds <4 x i32>, ptr %A, i64 %m
+//    %load = load <4 x i32>, ptr %p, align 4
+//    %1 = add <4 x i32> %load,  %X
+//    store <4 x i32> %1, ptr %p, align 4
+//    %iv.next = add nuw i64 %iv, 4
+//    %c  = icmp ult i64 %iv.next, %N
+//    br i1 %c, label %vector.ph, label %exit
+//  exit:
+//    ret void
+//
+// The vectorizer creates the SCEV check block to perform
+// runtime IV checks. This block can be used to determine true
+// range of the the IV as entry into the vector loop is only possible
+// for certain tripcount values.
+//
+void VPlanTransforms::removeRedundantAndMasks(VPlan &Plan) {
+  auto FindSCEVCheckBlock = [&]() -> VPIRBasicBlock * {
+    for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+             vp_depth_first_deep(Plan.getEntry()))) {
+      if (auto *IRBB = dyn_cast<VPIRBasicBlock>(VPBB))
+        if (IRBB->getIRBasicBlock()->getName() == "vector.scevcheck")
+          return IRBB;
+    }
+    return nullptr;
+  };
+
+  auto FindPHIRecipeToReplaceAnd = [&](VPBasicBlock *VPBB,
+                                       VPSingleDefRecipe *ToReplace) -> void {
+    VPRecipeBase *PredRecipe = nullptr;
+    for (auto &PHI : VPBB->phis()) {
+      if (auto *VPI = dyn_cast<VPSingleDefRecipe>(&PHI))
+        if (ToReplace->getOperand(0) == VPI ||
+            ToReplace->getOperand(1) == VPI) {
+          ToReplace->replaceAllUsesWith(VPI);
+          return;
+        }
+    }
+  };
+
+  if (VPIRBasicBlock *SCEVCheckBB = FindSCEVCheckBlock()) {
+    VPBasicBlock *VPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+
+    // Determine if the SCEV check BB branches to the loop preheader
+    // or header
+    if (!CheckPathFromSrcBBToDestBB(SCEVCheckBB, Plan.getVectorPreheader()) &&
+        !CheckPathFromSrcBBToDestBB(SCEVCheckBB, Plan.getVectorLoopRegion()))
+      return;
+
+    if (auto *SCEVCheckTerminatorRecipe =
+            dyn_cast<VPInstruction>(SCEVCheckBB->getTerminator())) {
+      if (SCEVCheckTerminatorRecipe->getOpcode() != VPInstruction::BranchOnCond)
+        return;
+
+      VPValue *SCEVCheckCondRecipe = SCEVCheckTerminatorRecipe->getOperand(0);
+
+      for (auto &R : VPBB->getRecipeList()) {
+        if (auto *VPI = dyn_cast<VPSingleDefRecipe>(&R)) {
+          Value *V = VPI->getUnderlyingValue();
+          if (!V)
+            continue;
+
+          if (Instruction *I = dyn_cast<Instruction>(V))
+            if (canElimAndMaskOnPHI(I, SCEVCheckBB,
+                                    SCEVCheckCondRecipe->getLiveInIRValue()))
+              return FindPHIRecipeToReplaceAnd(VPBB, VPI);
+        }
+      }
+    }
+  }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5a8a2bbc2975e..da19e03b4647d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -376,6 +376,11 @@ struct VPlanTransforms {
   /// users in the original exit block using the VPIRInstruction wrapping to the
   /// LCSSA phi.
   static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
+
+  // Make use of runtime SCEV check blocks generated by the LoopVectorizer 
+  // to see if we can eliminate bounds checking AND instructions in loop 
+  // blocks
+  static void removeRedundantAndMasks(VPlan &Plan);
 };
 
 } // namespace llvm
diff --git a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll b/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
deleted file mode 100644
index e0c658b99ee72..0000000000000
--- a/llvm/test/Transforms/LoopNoOpElimination/loop-no-op-and-elim.ll
+++ /dev/null
@@ -1,292 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -passes=loop-noop-elim -S | FileCheck %s
-
-define i64 @elim_no_op_and(i64 %N) {
-; CHECK-LABEL: define i64 @elim_no_op_and(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-define i64 @elim_no_op_and_reverse_condition_1(i64 %N) {
-; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_1(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ult i64 4294967295, %N
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-
-define i64 @elim_no_op_and_reverse_condition_2(i64 %N) {
-; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_2(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 4294967295, [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ult i64 4294967295, %N
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ult i64 %N, %index.next
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-
-define i64 @elim_no_op_and_reverse_condition_3(i64 %N) {
-; CHECK-LABEL: define i64 @elim_no_op_and_reverse_condition_3(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[N]], [[INDEX_NEXT]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ult i64 %N, %index.next
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-
-define i64 @elim_no_op_and_loop_with_preheader(i64 %N) {
-; CHECK-LABEL: define i64 @elim_no_op_and_loop_with_preheader(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[TMP0]], label %[[END:.*]], label %[[VECTOR_PREHEADER:.*]]
-; CHECK:       [[VECTOR_PREHEADER]]:
-; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.preheader
-vector.preheader:
-  br label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.preheader ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-define i64 @fail_elim_no_op_and_latch_not_using_constant(i64 %N, i64 %C) {
-; CHECK-LABEL: define i64 @fail_elim_no_op_and_latch_not_using_constant(
-; CHECK-SAME: i64 [[N:%.*]], i64 [[C:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[C]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %C
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-
-}
-
-define i64 @fail_elim_no_op_and_signed_conditional(i64 %N) {
-; CHECK-LABEL: define i64 @fail_elim_no_op_and_signed_conditional(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:                                 ; preds = %for.cond1.preheader.lr.ph
-  %cmp = icmp sgt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.body
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-define i64 @fail_elim_no_op_and_branch_on_false(i64 %N) {
-; CHECK-LABEL: define i64 @fail_elim_no_op_and_branch_on_false(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[CMP]], label %[[VECTOR_BODY:.*]], label %[[END:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %vector.body, label %end
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-define i64 @fail_elim_no_op_and_large_mask(i64 %N) {
-; CHECK-LABEL: define i64 @fail_elim_no_op_and_large_mask(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 1
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[INDEX]], 4294967295
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[AND]], 1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 1
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i64 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %and = and i64 %index, 4294967295
-  %index.next = add i64 %and, 1
-  %exit.cond = icmp ugt i64 %index.next, %N
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
-
-define i64 @fail_elim_no_op_and_mismatched_types(i64 %N) {
-; CHECK-LABEL: define i64 @fail_elim_no_op_and_mismatched_types(
-; CHECK-SAME: i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[VECTOR_SCEVCHECK:.*]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT:    br i1 [[CMP]], label %[[END:.*]], label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[EXT:%.*]] = zext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[AND:%.*]] = and i64 [[EXT]], 4294967295
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[AND]], [[N]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[VECTOR_BODY]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    ret i64 [[N]]
-;
-vector.scevcheck:
-  %cmp = icmp ugt i64 %N, 4294967295
-  br i1 %cmp, label %end, label %vector.body
-vector.body:
-  %index = phi i32 [ 0, %vector.scevcheck ], [ %index.next, %vector.body ]
-  %ext = zext i32 %index to i64
-  %and = and i64 %ext, 4294967295
-  %exit.cond = icmp ugt i64 %and, %N
-  %index.next = add i32 %index, 1
-  br i1 %exit.cond, label %end, label %vector.body
-end:
-  ret i64 %N
-}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll b/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll
new file mode 100644
index 0000000000000..a2ec0a9f91cbf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=loop-vectorize -force-vector-width=2 < %s | FileCheck %s
+
+define void @elim_no_op_and(i32 %N, ptr  %A, i32 %val) {
+; CHECK-LABEL: define void @elim_no_op_and(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], i32 [[VAL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP16_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP16_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_COND1_PREHEADER_LR_PH:.*]]
+; CHECK:       [[FOR_COND1_PREHEADER_LR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw i64 [[TMP0]], [[TMP0]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[FLATTEN_TRIPCOUNT]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[FLATTEN_TRIPCOUNT]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[FLATTEN_TRIPCOUNT]], 2
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[FLATTEN_TRIPCOUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[VAL]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[FLATTEN_TRIPCOUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_COND1_PREHEADER_LR_PH]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    br label %[[FOR_COND1_PREHEADER:.*]]
+; CHECK:       [[FOR_COND1_PREHEADER]]:
+; CHECK-NEXT:    [[INDVAR18:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVAR_NEXT19:%.*]], %[[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = and i64 [[INDVAR18]], 4294967295
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[TMP7]], [[VAL]]
+; CHECK-NEXT:    store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVAR_NEXT19]] = add nuw i64 [[INDVAR18]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVAR_NEXT19]], [[FLATTEN_TRIPCOUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp16.not = icmp eq i32 %N, 0
+  br i1 %cmp16.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %0 = zext i32 %N to i64
+  %flatten.tripcount = mul nuw i64 %0, %0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.lr.ph, %for.cond1.preheader
+  %indvar18 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvar.next19, %for.cond1.preheader ]
+  %idxprom = and i64 %indvar18, 4294967295
+  %arrayidx = getelementptr inbounds nuw i32, ptr %A, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  %add5 = add i32 %1, %val
+  store i32 %add5, ptr %arrayidx, align 4
+  %indvar.next19 = add nuw i64 %indvar18, 1
+  %exitcond.not = icmp eq i64 %indvar.next19, %flatten.tripcount
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.cond1.preheader
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond1.preheader
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+}
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+;.

>From 9f49a142d65cceb98b2c18e5b09d21d77bfadce6 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 24 Oct 2025 15:46:27 +0100
Subject: [PATCH 5/6] Enable VPlanTransforms::removeRedundantAndMasks

Enable removeRedundantAndMasks in VPlanTransform LoopVectorization
transforms pipeline. This highlights the optimization and how it affects
relevant tests.

Change-Id: I8daa53646931be0d90bd93f1e2f937916e7332b1
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp              | 1 +
 .../LoopVectorize/vplan-transforms-remove-redundant-masks.ll | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b96d29e635465..04232849dced2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7212,6 +7212,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
 
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
   VPlanTransforms::simplifyRecipes(BestVPlan);
+  VPlanTransforms::removeRedundantAndMasks(BestVPlan);
   VPlanTransforms::removeBranchOnConst(BestVPlan);
   if (BestVPlan.getEntry()->getSingleSuccessor() ==
       BestVPlan.getScalarPreheader()) {
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll b/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll
index a2ec0a9f91cbf..18370ed23bb74 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-transforms-remove-redundant-masks.ll
@@ -23,13 +23,12 @@ define void @elim_no_op_and(i32 %N, ptr  %A, i32 %val) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDEX]], 4294967295
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[TMP3]], 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:

>From 484c684a8eaeeef0373974b31808a41fafb7589b Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 24 Oct 2025 16:05:57 +0100
Subject: [PATCH 6/6] Fix test crash

Change-Id: If89ee3a4c67ab4589f15481bdf99818bce7e6795
---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ac356f397b5ca..3860e7cb6db3b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4492,6 +4492,9 @@ static bool CheckPathFromSrcBBToDestBB(VPBlockBase *Src, VPBlockBase *Dest) {
 // for certain tripcount values.
 //
 void VPlanTransforms::removeRedundantAndMasks(VPlan &Plan) {
+  if (!Plan.getVectorLoopRegion())
+    return;
+
   auto FindSCEVCheckBlock = [&]() -> VPIRBasicBlock * {
     for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
              vp_depth_first_deep(Plan.getEntry()))) {



More information about the llvm-commits mailing list