[Openmp-commits] [openmp] 2a58be4 - [HardwareLoops] NewPM support.

Samuel Parker via Openmp-commits openmp-commits at lists.llvm.org
Mon Feb 13 01:47:11 PST 2023


Author: Samuel Parker
Date: 2023-02-13T09:46:31Z
New Revision: 2a58be42396376e8d552158ff801d953c6c1bee3

URL: https://github.com/llvm/llvm-project/commit/2a58be42396376e8d552158ff801d953c6c1bee3
DIFF: https://github.com/llvm/llvm-project/commit/2a58be42396376e8d552158ff801d953c6c1bee3.diff

LOG: [HardwareLoops] NewPM support.

With the NPM, we're now defaulting to preserving LCSSA, so a couple
of tests have changed slightly.

Differential Revision: https://reviews.llvm.org/D140982

Added: 
    llvm/include/llvm/CodeGen/HardwareLoops.h

Modified: 
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/LinkAllPasses.h
    llvm/lib/CodeGen/CodeGen.cpp
    llvm/lib/CodeGen/HardwareLoops.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
    llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll
    llvm/test/Transforms/HardwareLoops/ARM/calls.ll
    llvm/test/Transforms/HardwareLoops/ARM/counter.ll
    llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll
    llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
    llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll
    llvm/test/Transforms/HardwareLoops/ARM/structure.ll
    llvm/test/Transforms/HardwareLoops/loop-guards.ll
    llvm/test/Transforms/HardwareLoops/scalar-while.ll
    llvm/test/Transforms/HardwareLoops/sibling-loops.ll
    llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
    llvm/test/Transforms/HardwareLoops/unscevable.ll
    llvm/tools/llc/llc.cpp
    llvm/tools/opt/opt.cpp
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/HardwareLoops.h b/llvm/include/llvm/CodeGen/HardwareLoops.h
new file mode 100644
index 0000000000000..c7b6e0f5ae567
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/HardwareLoops.h
@@ -0,0 +1,76 @@
+//===- HardwareLoops.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Defines an IR pass for the creation of hardware loops.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_HARDWARELOOPS_H
+#define LLVM_CODEGEN_HARDWARELOOPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct HardwareLoopOptions {
+  std::optional<unsigned> Decrement;
+  std::optional<unsigned> Bitwidth;
+  std::optional<bool> Force;
+  std::optional<bool> ForcePhi;
+  std::optional<bool> ForceNested;
+  std::optional<bool> ForceGuard;
+
+  HardwareLoopOptions &setDecrement(unsigned Count) {
+    Decrement = Count;
+    return *this;
+  }
+  HardwareLoopOptions &setCounterBitwidth(unsigned Width) {
+    Bitwidth = Width;
+    return *this;
+  }
+  HardwareLoopOptions &setForce(bool Force) {
+    this->Force = Force;
+    return *this;
+  }
+  HardwareLoopOptions &setForcePhi(bool Force) {
+    ForcePhi = Force;
+    return *this;
+  }
+  HardwareLoopOptions &setForceNested(bool Force) {
+    ForceNested = Force;
+    return *this;
+  }
+  HardwareLoopOptions &setForceGuard(bool Force) {
+    ForceGuard = Force;
+    return *this;
+  }
+  bool getForcePhi() const {
+    return ForcePhi.has_value() && ForcePhi.value();
+  }
+  bool getForceNested() const {
+    return ForceNested.has_value() && ForceNested.value();
+  }
+  bool getForceGuard() const {
+    return ForceGuard.has_value() && ForceGuard.value();
+  }
+};
+
+class HardwareLoopsPass : public PassInfoMixin<HardwareLoopsPass> {
+  HardwareLoopOptions Opts;
+
+public:
+  explicit HardwareLoopsPass(HardwareLoopOptions Opts = {})
+    : Opts(Opts) { }
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_HARDWARELOOPS_H

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 66d213c2877b7..530369fb073e5 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -542,7 +542,7 @@ namespace llvm {
   FunctionPass *createEHContGuardCatchretPass();
 
   /// Create Hardware Loop pass. \see HardwareLoops.cpp
-  FunctionPass *createHardwareLoopsPass();
+  FunctionPass *createHardwareLoopsLegacyPass();
 
   /// This pass inserts pseudo probe annotation for callsite profiling.
   FunctionPass *createPseudoProbeInserter();

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index a693bc2653a0f..8fa9e1e2948d9 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -153,7 +153,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
 void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
-void initializeHardwareLoopsPass(PassRegistry&);
+void initializeHardwareLoopsLegacyPass(PassRegistry&);
 void initializeMIRProfileLoaderPassPass(PassRegistry &);
 void initializeIPSCCPLegacyPassPass(PassRegistry&);
 void initializeIRCELegacyPassPass(PassRegistry&);

diff  --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 23596304595a7..0f591b0be0d2f 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -197,7 +197,7 @@ namespace {
       (void) llvm::createFloat2IntPass();
       (void) llvm::createEliminateAvailableExternallyPass();
       (void)llvm::createScalarizeMaskedMemIntrinLegacyPass();
-      (void) llvm::createHardwareLoopsPass();
+      (void) llvm::createHardwareLoopsLegacyPass();
       (void) llvm::createInjectTLIMappingsLegacyPass();
       (void) llvm::createUnifyLoopExitsPass();
       (void) llvm::createFixIrreduciblePass();

diff  --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 398ff56f737c4..0803c2cf66b28 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -48,7 +48,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeFuncletLayoutPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
-  initializeHardwareLoopsPass(Registry);
+  initializeHardwareLoopsLegacyPass(Registry);
   initializeIfConverterPass(Registry);
   initializeImplicitNullChecksPass(Registry);
   initializeIndirectBrExpandPassPass(Registry);

diff  --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 258ad1931b12b..e7b14d700a44a 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,8 +15,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/HardwareLoops.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -115,12 +117,12 @@ namespace {
 
   using TTI = TargetTransformInfo;
 
-  class HardwareLoops : public FunctionPass {
+  class HardwareLoopsLegacy : public FunctionPass {
   public:
     static char ID;
 
-    HardwareLoops() : FunctionPass(ID) {
-      initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+    HardwareLoopsLegacy() : FunctionPass(ID) {
+      initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override;
@@ -131,29 +133,44 @@ namespace {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolutionWrapperPass>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
       AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+      AU.addPreserved<BranchProbabilityInfoWrapperPass>();
     }
+  };
+
+  class HardwareLoopsImpl {
+  public:
+    HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
+                      DominatorTree &DT, const DataLayout &DL,
+                      const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
+                      AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
+                      HardwareLoopOptions &Opts)
+      : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
+        TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
 
+    bool run(Function &F);
+
+  private:
     // Try to convert the given Loop into a hardware loop.
-    bool TryConvertLoop(Loop *L);
+    bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
 
     // Given that the target believes the loop to be profitable, try to
     // convert it.
     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
 
-  private:
-    ScalarEvolution *SE = nullptr;
-    LoopInfo *LI = nullptr;
-    const DataLayout *DL = nullptr;
-    OptimizationRemarkEmitter *ORE = nullptr;
-    const TargetTransformInfo *TTI = nullptr;
-    DominatorTree *DT = nullptr;
-    bool PreserveLCSSA = false;
-    AssumptionCache *AC = nullptr;
-    TargetLibraryInfo *LibInfo = nullptr;
-    Module *M = nullptr;
+    ScalarEvolution &SE;
+    LoopInfo &LI;
+    bool PreserveLCSSA;
+    DominatorTree &DT;
+    const DataLayout &DL;
+    const TargetTransformInfo &TTI;
+    TargetLibraryInfo *TLI = nullptr;
+    AssumptionCache &AC;
+    OptimizationRemarkEmitter *ORE;
+    HardwareLoopOptions &Opts;
     bool MadeChange = false;
   };
 
@@ -182,8 +199,9 @@ namespace {
   public:
     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
                  const DataLayout &DL,
-                 OptimizationRemarkEmitter *ORE) :
-      SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
+                 OptimizationRemarkEmitter *ORE,
+                 HardwareLoopOptions &Opts) :
+      SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
       ExitCount(Info.ExitCount),
       CountType(Info.CountType),
       ExitBranch(Info.ExitBranch),
@@ -197,6 +215,7 @@ namespace {
     ScalarEvolution &SE;
     const DataLayout &DL;
     OptimizationRemarkEmitter *ORE = nullptr;
+    HardwareLoopOptions &Opts;
     Loop *L                 = nullptr;
     Module *M               = nullptr;
     const SCEV *ExitCount   = nullptr;
@@ -209,40 +228,83 @@ namespace {
   };
 }
 
-char HardwareLoops::ID = 0;
+char HardwareLoopsLegacy::ID = 0;
 
-bool HardwareLoops::runOnFunction(Function &F) {
+bool HardwareLoopsLegacy::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
 
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  DL = &F.getParent()->getDataLayout();
-  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  auto &DL = F.getParent()->getDataLayout();
+  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
-  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  M = F.getParent();
+  auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+  HardwareLoopOptions Opts;
+  if (ForceHardwareLoops.getNumOccurrences())
+    Opts.setForce(ForceHardwareLoops);
+  if (ForceHardwareLoopPHI.getNumOccurrences())
+    Opts.setForcePhi(ForceHardwareLoopPHI);
+  if (ForceNestedLoop.getNumOccurrences())
+    Opts.setForceNested(ForceNestedLoop);
+  if (ForceGuardLoopEntry.getNumOccurrences())
+    Opts.setForceGuard(ForceGuardLoopEntry);
+  if (LoopDecrement.getNumOccurrences())
+    Opts.setDecrement(LoopDecrement);
+  if (CounterBitWidth.getNumOccurrences())
+    Opts.setCounterBitwidth(CounterBitWidth);
 
-  for (Loop *L : *LI)
-    if (L->isOutermost())
-      TryConvertLoop(L);
+  HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
+                         Opts);
+  return Impl.run(F);
+}
+
+PreservedAnalyses HardwareLoopsPass::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  auto &LI = AM.getResult<LoopAnalysis>(F);
+  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+  auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto &DL = F.getParent()->getDataLayout();
+
+  HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
+  bool Changed = Impl.run(F);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<LoopAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<BranchProbabilityAnalysis>();
+  return PA;
+}
 
+bool HardwareLoopsImpl::run(Function &F) {
+  LLVMContext &Ctx = F.getParent()->getContext();
+  for (Loop *L : LI)
+    if (L->isOutermost())
+      TryConvertLoop(L, Ctx);
   return MadeChange;
 }
 
 // Return true if the search should stop, which will be when an inner loop is
 // converted and the parent loop doesn't support containing a hardware loop.
-bool HardwareLoops::TryConvertLoop(Loop *L) {
+bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
   // Process nested loops first.
   bool AnyChanged = false;
   for (Loop *SL : *L)
-    AnyChanged |= TryConvertLoop(SL);
+    AnyChanged |= TryConvertLoop(SL, Ctx);
   if (AnyChanged) {
     reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
                         ORE, L);
@@ -252,39 +314,39 @@ bool HardwareLoops::TryConvertLoop(Loop *L) {
   LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
 
   HardwareLoopInfo HWLoopInfo(L);
-  if (!HWLoopInfo.canAnalyze(*LI)) {
+  if (!HWLoopInfo.canAnalyze(LI)) {
     reportHWLoopFailure("cannot analyze loop, irreducible control flow",
                         "HWLoopCannotAnalyze", ORE, L);
     return false;
   }
 
-  if (!ForceHardwareLoops &&
-      !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+  if (!Opts.Force &&
+      !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
     reportHWLoopFailure("it's not profitable to create a hardware-loop",
                         "HWLoopNotProfitable", ORE, L);
     return false;
   }
 
   // Allow overriding of the counter width and loop decrement value.
-  if (CounterBitWidth.getNumOccurrences())
-    HWLoopInfo.CountType =
-      IntegerType::get(M->getContext(), CounterBitWidth);
+  if (Opts.Bitwidth.has_value()) {
+    HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
+  }
 
-  if (LoopDecrement.getNumOccurrences())
+  if (Opts.Decrement.has_value())
     HWLoopInfo.LoopDecrement =
-      ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+      ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
 
   MadeChange |= TryConvertLoop(HWLoopInfo);
-  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+  return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
 }
 
-bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
 
   Loop *L = HWLoopInfo.L;
   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
 
-  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
-                                          ForceHardwareLoopPHI)) {
+  if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
+                                          Opts.getForcePhi())) {
     // TODO: there can be many reasons a loop is not considered a
     // candidate, so we should let isHardwareLoopCandidate fill in the
     // reason and then report a better message here.
@@ -300,11 +362,11 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
 
   // If we don't have a preheader, then insert one.
   if (!Preheader)
-    Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+    Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
   if (!Preheader)
     return false;
 
-  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
+  HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
   HWLoop.Create();
   ++NumHWLoops;
   return true;
@@ -322,7 +384,7 @@ void HardwareLoop::Create() {
 
   Value *Setup = InsertIterationSetup(LoopCountInit);
 
-  if (UsePHICounter || ForceHardwareLoopPHI) {
+  if (UsePHICounter || Opts.ForcePhi) {
     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
     Value *EltsRem = InsertPHICounter(Setup, LoopDec);
     LoopDec->setOperand(0, EltsRem);
@@ -397,7 +459,8 @@ Value *HardwareLoop::InitLoopCount() {
   if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
                                   SE.getZero(ExitCount->getType()))) {
     LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
-    UseLoopGuard |= ForceGuardLoopEntry;
+    if (Opts.ForceGuard)
+      UseLoopGuard = true;
   } else
     UseLoopGuard = false;
 
@@ -441,7 +504,7 @@ Value *HardwareLoop::InitLoopCount() {
 Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
   IRBuilder<> Builder(BeginBB->getTerminator());
   Type *Ty = LoopCountInit->getType();
-  bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+  bool UsePhi = UsePHICounter || Opts.ForcePhi;
   Intrinsic::ID ID = UseLoopGuard
                          ? (UsePhi ? Intrinsic::test_start_loop_iterations
                                    : Intrinsic::test_set_loop_iterations)
@@ -533,11 +596,11 @@ void HardwareLoop::UpdateBranch(Value *EltsRem) {
   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
 }
 
-INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
 
-FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
+FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4b8754df7fb63..93771ea7064f5 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/HardwareLoops.h"
 #include "llvm/CodeGen/TypePromotion.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -540,6 +541,48 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
   return Result;
 }
 
+/// Parser of parameters for HardwareLoops  pass.
+Expected<HardwareLoopOptions> parseHardwareLoopOptions(StringRef Params) {
+  HardwareLoopOptions HardwareLoopOpts;
+
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+    if (ParamName.consume_front("hardware-loop-decrement=")) {
+      int Count;
+      if (ParamName.getAsInteger(0, Count))
+        return make_error<StringError>(
+            formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+            inconvertibleErrorCode());
+      HardwareLoopOpts.setDecrement(Count);
+      continue;
+    }
+    if (ParamName.consume_front("hardware-loop-counter-bitwidth=")) {
+      int Count;
+      if (ParamName.getAsInteger(0, Count))
+        return make_error<StringError>(
+            formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+            inconvertibleErrorCode());
+      HardwareLoopOpts.setCounterBitwidth(Count);
+      continue;
+    }
+    if (ParamName == "force-hardware-loops") {
+      HardwareLoopOpts.setForce(true);
+    } else if (ParamName == "force-hardware-loop-phi") {
+      HardwareLoopOpts.setForcePhi(true);
+    } else if (ParamName == "force-nested-hardware-loop") {
+      HardwareLoopOpts.setForceNested(true);
+    } else if (ParamName == "force-hardware-loop-guard") {
+      HardwareLoopOpts.setForceGuard(true);
+    } else {
+      return make_error<StringError>(
+          formatv("invalid HardwarePass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return HardwareLoopOpts;
+}
+
 /// Parser of parameters for LoopUnroll pass.
 Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
   LoopUnrollOptions UnrollOpts;

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 73ab87dd88236..e5aeb466b87b9 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -424,6 +424,18 @@ FUNCTION_PASS_WITH_PARAMS("ee-instrument",
                            },
                           parseEntryExitInstrumenterPassOptions,
                           "post-inline")
+FUNCTION_PASS_WITH_PARAMS("hardware-loops",
+                          "HardwareLoopsPass",
+                          [](HardwareLoopOptions Opts) {
+                              return HardwareLoopsPass(Opts);
+                          },
+                          parseHardwareLoopOptions,
+                          "force-hardware-loops;"
+                          "force-hardware-loop-phi;"
+                          "force-nested-hardware-loop;"
+                          "force-hardware-loop-guard;"
+                          "hardware-loop-decrement=N;"
+                          "hardware-loop-counter-bitwidth=N")
 FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
                           "LowerMatrixIntrinsicsPass",
                            [](bool Minimal) {

diff  --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 4646ae1711077..c5eeb9820cd0c 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -481,7 +481,7 @@ bool ARMPassConfig::addPreISel() {
   }
 
   if (TM->getOptLevel() != CodeGenOpt::None) {
-    addPass(createHardwareLoopsPass());
+    addPass(createHardwareLoopsLegacyPass());
     addPass(createMVETailPredicationPass());
     // FIXME: IR passes can delete address-taken basic blocks, deleting
     // corresponding blockaddresses. ARMConstantPoolConstant holds references to

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b1de8d00f409e..2944736937eb8 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -474,7 +474,7 @@ bool PPCPassConfig::addPreISel() {
     addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
 
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createHardwareLoopsPass());
+    addPass(createHardwareLoopsLegacyPass());
 
   return false;
 }

diff  --git a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll
index e83a7ccd825de..afa0f8c4adc0a 100644
--- a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -hardware-loops -S -verify-loop-lcssa %s | FileCheck %s
+; RUN: opt < %s -passes=hardware-loops -verify-loop-lcssa -S | FileCheck %s
 
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "ppc64-unknown-linux-elf"
@@ -20,11 +20,12 @@ define void @test() {
 ; CHECK-NEXT:    [[C_0:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]]
 ; CHECK:       while.cond25.preheader:
+; CHECK-NEXT:    [[INDVARS_IV349_PH:%.*]] = phi i64 [ 50, [[FOR_INC]] ]
 ; CHECK-NEXT:    call void @llvm.set.loop.iterations.i64(i64 51)
 ; CHECK-NEXT:    br label [[WHILE_COND25:%.*]]
 ; CHECK:       while.cond25:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ [[INDVARS_IV349_PH]], [[WHILE_COND25_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]]
 ; CHECK:       land.rhs:

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/calls.ll b/llvm/test/Transforms/HardwareLoops/ARM/calls.ll
index 38c14b261a922..3ac96b8f1a61c 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/calls.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/calls.ll
@@ -1,9 +1,9 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
 
 ; DISABLED-NOT: call i32 @llvm.loop.decrement
 

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/counter.ll b/llvm/test/Transforms/HardwareLoops/ARM/counter.ll
index 74879320a2dec..35dea0b706048 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/counter.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/counter.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -o - | FileCheck %s
 
 @g = common local_unnamed_addr global ptr null, align 4
 

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll b/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll
index ef366fc835a70..ed1b0e17469ea 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
 
 @g = common local_unnamed_addr global ptr null, align 4
 

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
index ce28a0fe9f86e..86f9e1e85b9da 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
 
 ; CHECK-LABEL: test_fptosi
 ; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll
index 4a5d979341c8c..9130d65fb3ed0 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC
 
 ; DISABLED-NOT: llvm.{{.*}}.loop.iterations

diff  --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
index 555dbd9ad2838..cb66fefbfcc85 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
@@ -1,8 +1,8 @@
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | \
 ; RUN:     FileCheck %s
 ; RUN: opt -mtriple=thumbv8.1m.main -passes=loop-unroll -unroll-remainder=false -S < %s | \
 ; RUN:     llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops \
 ; RUN:     -pass-remarks-analysis=hardware-loops  %s -S -o - 2>&1 | \
 ; RUN:     FileCheck %s --check-prefix=CHECK-REMARKS
 
@@ -14,7 +14,7 @@
 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
-; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
+; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
 ; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
 

diff  --git a/llvm/test/Transforms/HardwareLoops/loop-guards.ll b/llvm/test/Transforms/HardwareLoops/loop-guards.ll
index 7b001771399ca..43aae5ea35b4d 100644
--- a/llvm/test/Transforms/HardwareLoops/loop-guards.ll
+++ b/llvm/test/Transforms/HardwareLoops/loop-guards.ll
@@ -1,6 +1,6 @@
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=false -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
+; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;force-hardware-loop-phi;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
 
 ; NO-GUARD-NOT: @llvm.test.set.loop.iterations
 

diff  --git a/llvm/test/Transforms/HardwareLoops/scalar-while.ll b/llvm/test/Transforms/HardwareLoops/scalar-while.ll
index 46e4f7721c1ae..5b09dc8f23fd2 100644
--- a/llvm/test/Transforms/HardwareLoops/scalar-while.ll
+++ b/llvm/test/Transforms/HardwareLoops/scalar-while.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-nested-hardware-loop>' -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
 
 define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) {
 ; CHECK-DEC-LABEL: @while_lt(

diff  --git a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
index 50b89f141cc2f..98714857530a8 100644
--- a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
+++ b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S | FileCheck %s
+; RUN: opt < %s -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S | FileCheck %s
 
 define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: @test(
@@ -25,10 +25,11 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]]
 ; CHECK:       for.body15.us.preheader:
+; CHECK-NEXT:    [[J10_055_US_PH:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ]
 ; CHECK-NEXT:    call void @llvm.set.loop.iterations.i32(i32 [[N]])
 ; CHECK-NEXT:    br label [[FOR_BODY15_US:%.*]]
 ; CHECK:       for.body15.us:
-; CHECK-NEXT:    [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ]
+; CHECK-NEXT:    [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ [[J10_055_US_PH]], [[FOR_BODY15_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, ptr [[OFF]], i32 [[J10_055_US]]
 ; CHECK-NEXT:    [[L0:%.*]] = load i16, ptr [[ARRAYIDX16_US]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i32 [[J10_055_US]]

diff  --git a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
index 847d92fa0f260..804e2bc24fb4e 100644
--- a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
+++ b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
@@ -1,6 +1,6 @@
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
 
 ; CHECK-LABEL: not_rotated
 ; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations

diff  --git a/llvm/test/Transforms/HardwareLoops/unscevable.ll b/llvm/test/Transforms/HardwareLoops/unscevable.ll
index 5cde63c4ac85b..b55c80fd32d88 100644
--- a/llvm/test/Transforms/HardwareLoops/unscevable.ll
+++ b/llvm/test/Transforms/HardwareLoops/unscevable.ll
@@ -1,6 +1,6 @@
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
 
 ; CHECK-LABEL: float_counter
 ; CHECK-NOT: set.loop.iterations

diff  --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 544e7aed913b1..ed65b83487790 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -366,7 +366,7 @@ int main(int argc, char **argv) {
   initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
   initializeExpandReductionsPass(*Registry);
   initializeExpandVectorPredicationPass(*Registry);
-  initializeHardwareLoopsPass(*Registry);
+  initializeHardwareLoopsLegacyPass(*Registry);
   initializeTransformUtils(*Registry);
   initializeReplaceWithVeclibLegacyPass(*Registry);
   initializeTLSVariableHoistLegacyPassPass(*Registry);

diff  --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index e837a70c7cd14..ec338c8574447 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -368,7 +368,6 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
       "verify-safepoint-ir",
       "atomic-expand",
       "expandvp",
-      "hardware-loops",
       "mve-tail-predication",
       "interleaved-access",
       "global-merge",
@@ -462,7 +461,6 @@ int main(int argc, char **argv) {
   initializeExpandVectorPredicationPass(Registry);
   initializeWasmEHPreparePass(Registry);
   initializeWriteBitcodePassPass(Registry);
-  initializeHardwareLoopsPass(Registry);
   initializeReplaceWithVeclibLegacyPass(Registry);
   initializeJMCInstrumenterPass(Registry);
 

diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
index d7f42a117e2ba..3983f7044dcf7 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
@@ -113,7 +113,7 @@ void init(Triple TT) {
   initializeExpandVectorPredicationPass(Registry);
   initializeWasmEHPreparePass(Registry);
   initializeWriteBitcodePassPass(Registry);
-  initializeHardwareLoopsPass(Registry);
+  initializeHardwareLoopsLegacyPass(Registry);
   initializeTypePromotionLegacyPass(Registry);
   initializeReplaceWithVeclibLegacyPass(Registry);
   initializeJMCInstrumenterPass(Registry);


        


More information about the Openmp-commits mailing list