[llvm] [NewPM][CodeGen][WIP] Add callback style CodeGen pass pipeline builder (PR #104725)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 01:17:41 PDT 2024


================
@@ -2221,3 +2338,578 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
 
   return AA;
 }
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static std::string getFSRemappingFile(const TargetMachine *TM,
+                                      const CGPassBuilderOption &Options) {
+  if (!Options.FSRemappingFile.empty())
+    return Options.FSRemappingFile;
+  const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileRemappingFile;
+}
+
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static std::string getFSProfileFile(const TargetMachine *TM,
+                                    const CGPassBuilderOption &Options) {
+  if (!Options.FSProfileFile.empty())
+    return Options.FSProfileFile;
+  const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileFile;
+}
+
+Error PassBuilder::addExceptionHandlingPasses(FunctionPassManager &FPM) {
+  const MCAsmInfo *MCAI = TM->getMCAsmInfo();
+  if (!MCAI)
+    return make_error<StringError>("No MCAsmInfo!", inconvertibleErrorCode());
+  switch (MCAI->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
+    FPM.addPass(SjLjEHPreparePass(TM));
+    [[fallthrough]];
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+  case ExceptionHandling::AIX:
+  case ExceptionHandling::ZOS:
+    FPM.addPass(DwarfEHPreparePass(TM));
+    break;
+  case ExceptionHandling::WinEH:
+    // We support using both GCC-style and MSVC-style exceptions on Windows, so
+    // add both preparation passes. Each pass will only actually run if it
+    // recognizes the personality function.
+    FPM.addPass(WinEHPreparePass());
+    FPM.addPass(DwarfEHPreparePass(TM));
+    break;
+  case ExceptionHandling::Wasm:
+    // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
+    // on catchpads and cleanuppads because it does not outline them into
+    // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
+    // should remove PHIs there.
+    FPM.addPass(WinEHPreparePass(/*DemoteCatchSwitchPHIOnly=*/true));
+    FPM.addPass(WasmEHPreparePass());
+    break;
+  case ExceptionHandling::None:
+    FPM.addPass(LowerInvokePass());
+
+    // The lower invoke pass may create unreachable code. Remove it.
+    FPM.addPass(UnreachableBlockElimPass());
+    break;
+  }
+  return Error::success();
+}
+
+Error PassBuilder::addInstructionSelectorPasses(
+    MachineFunctionPassManager &MFPM, const CGPassBuilderOption &Options) {
+  CodeGenOptLevel OptLevel = TM->getOptLevel();
+
+  // Core ISel
+  // Enable FastISel with -fast-isel, but allow that to be overridden.
+  TM->setO0WantsFastISel(Options.EnableFastISelOption.value_or(true));
+  // Determine an instruction selector.
+  enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+  SelectorType Selector;
+
+  Options.EnableFastISelOption.value_or(false);
+  if (Options.EnableFastISelOption.value_or(false))
+    Selector = SelectorType::FastISel;
+
+  else if (Options.EnableGlobalISelOption.value_or(false) ||
+           (TM->Options.EnableGlobalISel &&
+            !Options.EnableGlobalISelOption.value_or(false)))
+    Selector = SelectorType::GlobalISel;
+  else if (OptLevel == CodeGenOptLevel::None && TM->getO0WantsFastISel())
+    Selector = SelectorType::FastISel;
+  else
+    Selector = SelectorType::SelectionDAG;
+
+  // Set consistently TM.Options.EnableFastISel and EnableGlobalISel.
+  if (Selector == SelectorType::FastISel) {
+    TM->setFastISel(true);
+    TM->setGlobalISel(false);
+  } else if (Selector == SelectorType::GlobalISel) {
+    TM->setFastISel(false);
+    TM->setGlobalISel(true);
+  }
+
+  // Add instruction selector passes.
+  if (Selector == SelectorType::GlobalISel) {
+    MFPM.addPass(IRTranslatorPass());
+    MFPM.addPass(LegalizerPass());
+
+    // Before running the register bank selector, ask the target if it
+    // wants to run some passes.
+    invokePreRegBankSelectEPCallbacks(MFPM, OptLevel);
+    MFPM.addPass(RegBankSelectPass());
+
+    invokePreGlobalInstructionSelectEPCallbacks(MFPM, OptLevel);
+    MFPM.addPass(InstructionSelectPass());
+    invokePostGlobalInstructionSelectEPCallbacks(MFPM, OptLevel);
+
+    // Pass to reset the MachineFunction if the ISel failed.
+    MFPM.addPass(ResetMachineFunctionPass(
+        TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag,
+        TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable));
+
+    // Provide a fallback path when we do not want to abort on
+    // not-yet-supported input.
+    if (TM->Options.GlobalISelAbort != GlobalISelAbortMode::Enable) {
+      if (!AddInstSelectorCallback)
+        return make_error<StringError>("No InstSelectorCallback!",
+                                       inconvertibleErrorCode());
+      AddInstSelectorCallback(MFPM, OptLevel);
+    }
+  } else {
+    if (!AddInstSelectorCallback)
+      return make_error<StringError>("No InstSelectorCallback!",
+                                     inconvertibleErrorCode());
+    AddInstSelectorCallback(MFPM, OptLevel);
+  }
+  return Error::success();
+}
+
+void PassBuilder::addMachineSSAOptimizationPasses(
+    MachineFunctionPassManager &MFPM, CodeGenOptLevel Level) {
+  // Pre-ra tail duplication.
+  MFPM.addPass(EarlyTailDuplicatePass());
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  MFPM.addPass(OptimizePHIsPass());
+
+  // This pass merges large allocas. StackSlotColoring is a different pass
+  // which merges spill slots.
+  MFPM.addPass(StackColoringPass());
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  MFPM.addPass(LocalStackSlotAllocationPass());
+
+  // With optimization, dead code should already be eliminated. However
+  // there is one known exception: lowered code for arguments that are only
+  // used by tail calls, where the tail calls reuse the incoming stack
+  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+  MFPM.addPass(DeadMachineInstructionElimPass());
+
+  // Allow targets to insert passes that improve instruction level parallelism,
+  // like if-conversion. Such passes will typically need dominator trees and
+  // loop info, just like LICM and CSE below.
+  invokeILPOptsEPCallbacks(MFPM, Level);
+
+  MFPM.addPass(EarlyMachineLICMPass());
+  MFPM.addPass(MachineCSEPass());
+
+  MFPM.addPass(MachineSinkingPass());
+
+  MFPM.addPass(PeepholeOptimizerPass());
+  // Clean-up the dead code that may have been generated by peephole
+  // rewriting.
+  MFPM.addPass(DeadMachineInstructionElimPass());
+}
+
+Error PassBuilder::setStartStop(ModulePassManager &MPM) {
+  auto StartStopInfoOrErr = TargetPassConfig::getStartStopInfo(*PIC);
+  if (!StartStopInfoOrErr)
+    return StartStopInfoOrErr.takeError();
+  auto &SSI = *StartStopInfoOrErr;
+
+  bool Started = SSI.StartPass.empty(), Stopped = false;
+  // Return true if pass is skipped.
+  auto Filter = [&, StartInstanceNum = 0u,
+                 StopInstanceNum = 0u](StringRef Name) mutable {
+    if (!Started) {
+      if (PIC->getPassNameForClassName(Name) == SSI.StartPass)
+        ++StartInstanceNum;
+      if (StartInstanceNum == SSI.StartInstanceNum) {
+        Started = true;
+        return SSI.StartAfter;
+      }
+      return true;
+    }
+
+    if (!Stopped) {
+      if (!SSI.StopPass.empty() &&
+          PIC->getPassNameForClassName(Name) == SSI.StopPass)
+        ++StopInstanceNum;
+      if (StopInstanceNum == SSI.StopInstanceNum) {
+        Stopped = true;
+        return !SSI.StopAfter;
+      }
+      return false;
+    }
+
+    return !Started || Stopped;
+  };
+
+  MPM.eraseIf(Filter);
+  if (!Started) {
+    return make_error<StringError>(
+        "Can't find start pass \"" + SSI.StartPass + "\".",
+        std::make_error_code(std::errc::invalid_argument));
+  }
+  if (!Stopped && !SSI.StopPass.empty()) {
+    return make_error<StringError>(
+        "Can't find stop pass \"" + SSI.StopPass + "\".",
+        std::make_error_code(std::errc::invalid_argument));
+  }
+  return Error::success();
+}
+
+Error PassBuilder::addRegisterAllocatorPasses(
+    MachineFunctionPassManager &MFPM, const CGPassBuilderOption &Options) {
+  CodeGenOptLevel Level = TM->getOptLevel();
+  const bool OptimizeRegAlloc =
+      Options.OptimizeRegAlloc.value_or(Level != CodeGenOptLevel::None);
+
+  if (OptimizeRegAlloc) {
+    AddOptimizedRegAllocCallback(*this, MFPM);
+  } else {
+    AddFastRegAllocCallback(MFPM);
+  }
+  return Error::success();
+}
+
+Error PassBuilder::addRegAllocPass(MachineFunctionPassManager &MFPM,
+                                   StringRef Filter) {
+  return Error::success();
+}
+
+Expected<ModulePassManager> PassBuilder::buildDefaultCodeGenPipeline(
+    raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+    MCContext &Ctx) {
+  if (!TM)
+    return make_error<StringError>("Need a TargetMachine instance!",
+                                   inconvertibleErrorCode());
+
+  CGPassBuilderOption Options = getCGPassBuilderOption();
+  CodeGenPassPipelineTunningCallback(Options);
+  CodeGenOptLevel OptLevel = TM->getOptLevel();
+
+  bool PrintAsm = TargetPassConfig::willCompleteCodeGenPipeline();
+  bool PrintMIR = !PrintAsm && FileType != CodeGenFileType::Null;
+
+  ModulePassManager MPM;
+  FunctionPassManager FPM;
+  MachineFunctionPassManager MFPM;
+
+  // IR part
+  MPM.addPass(RequireAnalysisPass<MachineModuleAnalysis, Module>());
+  MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+  MPM.addPass(RequireAnalysisPass<CollectorMetadataAnalysis, Module>());
+  if (TM->useEmulatedTLS())
+    MPM.addPass(LowerEmuTLSPass());
+  MPM.addPass(PreISelIntrinsicLoweringPass(TM));
+
+  // For MachO, lower @llvm.global_dtors into @llvm.global_ctors with
+  // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
+  if (TM->getTargetTriple().isOSBinFormatMachO() &&
+      !Options.DisableAtExitBasedGlobalDtorLowering)
+    MPM.addPass(LowerGlobalDtorsPass());
+
+  FPM.addPass(ExpandLargeDivRemPass(TM));
+  FPM.addPass(ExpandLargeFpConvertPass(TM));
+
+  // Extension point?
+
+  // Run loop strength reduction before anything else.
+  if (OptLevel != CodeGenOptLevel::None) {
+    if (!Options.DisableLSR)
+      FPM.addPass(createFunctionToLoopPassAdaptor(LoopStrengthReducePass(),
+                                                  /*UseMemorySSA=*/true));
+    // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+    // loads and compares. ExpandMemCmpPass then tries to expand those calls
+    // into optimally-sized loads and compares. The transforms are enabled by a
+    // target lowering hook.
+    if (!Options.DisableMergeICmps)
+      FPM.addPass(MergeICmpsPass());
+    FPM.addPass(ExpandMemCmpPass(TM));
+  }
+
+  // Run GC lowering passes for builtin collectors
+  FPM.addPass(GCLoweringPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+  MPM.addPass(ShadowStackGCLoweringPass());
+  FPM = FunctionPassManager();
+  invokeGCLoweringEPCallbacks(FPM);
+
+  // Make sure that no unreachable blocks are instruction selected.
+  FPM.addPass(UnreachableBlockElimPass());
+
+  if (OptLevel != CodeGenOptLevel::None) {
+    if (!Options.DisableConstantHoisting)
+      FPM.addPass(ConstantHoistingPass());
+    if (!Options.DisableReplaceWithVecLib)
+      FPM.addPass(ReplaceWithVeclib());
+    if (!Options.DisablePartialLibcallInlining)
+      FPM.addPass(PartiallyInlineLibCallsPass());
+  }
+
+  // Instrument function entry after all inlining.
+  FPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/true));
+
+  // Add scalarization of target's unsupported masked memory intrinsics pass.
+  // the unsupported intrinsic will be replaced with a chain of basic blocks,
+  // that stores/loads element one-by-one if the appropriate mask bit is set.
+  FPM.addPass(ScalarizeMaskedMemIntrinPass());
+
+  // Expand reduction intrinsics into shuffle sequences if the target wants to.
+  // Allow disabling it for testing purposes.
+  if (!Options.DisableExpandReductions)
+    FPM.addPass(ExpandReductionsPass());
+
+  if (OptLevel != CodeGenOptLevel::None) {
+    FPM.addPass(TLSVariableHoistPass());
+
+    // Convert conditional moves to conditional jumps when profitable.
+    if (!Options.DisableSelectOptimize)
+      FPM.addPass(SelectOptimizePass(TM));
+
+    // CodeGen prepare
+    if (!Options.DisableCGP)
+      FPM.addPass(CodeGenPreparePass(TM));
+  }
+
+  // Turn exception handling constructs into something the code generators can
+  // handle.
+  if (auto Err = addExceptionHandlingPasses(FPM))
+    return std::move(Err);
+
+  { // Pre isel extension
+    ModulePassManager ISelPreparePasses;
+    invokeISelPrepareEPCallbacks(ISelPreparePasses);
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    MPM.addPass(std::move(ISelPreparePasses));
+  }
+
+  if (OptLevel != CodeGenOptLevel::None)
+    FPM.addPass(ObjCARCContractPass());
+  FPM.addPass(CallBrPreparePass());
+  // Add both the safe stack and the stack protection passes: each of them will
+  // only protect functions that have corresponding attributes.
+  FPM.addPass(SafeStackPass(TM));
+  FPM.addPass(StackProtectorPass(TM));
+
+  if (PrintMIR) {
+    if (Options.RequiresCodeGenSCCOrder)
+      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+          createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+    else
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    MPM.addPass(PrintMIRPreparePass(Out));
+    FPM = FunctionPassManager();
+  }
+
+  if (auto Err = addInstructionSelectorPasses(MFPM, Options))
+    return std::move(Err);
+
+  // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+  // FinalizeISel.
+  MFPM.addPass(FinalizeISelPass());
+
+  // Add passes that optimize machine instructions in SSA form.
+  if (OptLevel != CodeGenOptLevel::None) {
+    invokeMachineSSAOptimizationEarlyEPCallbacks(MFPM, OptLevel);
+    addMachineSSAOptimizationPasses(MFPM, OptLevel);
+    invokeMachineSSAOptimizationLastEPCallbacks(MFPM, OptLevel);
+  } else {
+    MFPM.addPass(LocalStackSlotAllocationPass());
+  }
+
+  if (TM->Options.EnableIPRA)
+    MFPM.addPass(RegUsageInfoPropagationPass());
+
+  // Run pre-ra passes.
+  invokePreRegAllocEPCallbacks(MFPM, OptLevel);
+
+  if (EnableFSDiscriminator) {
+    MFPM.addPass(
+        MIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass::Pass1));
+    const std::string ProfileFile = getFSProfileFile(TM, Options);
+    if (!ProfileFile.empty() && !Options.DisableRAFSProfileLoader)
+      MFPM.addPass(MIRProfileLoaderNewPass(
+          ProfileFile, getFSRemappingFile(TM, Options),
+          sampleprof::FSDiscriminatorPass::Pass1, nullptr));
+  }
+
+  if (auto Err = addRegisterAllocatorPasses(MFPM, Options))
+    return std::move(Err);
+
+  invokePostRegAllocEPCallbacks(MFPM, OptLevel);
+
+  MFPM.addPass(RemoveRedundantDebugValuesPass());
+  MFPM.addPass(FixupStatepointCallerSavedPass());
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  if (OptLevel != CodeGenOptLevel::None) {
+    MFPM.addPass(PostRAMachineSinkingPass());
+    MFPM.addPass(ShrinkWrapPass());
+  }
+
+  if (!Options.DisablePrologEpilogInserterPass)
+    MFPM.addPass(PrologEpilogInserterPass());
+  /// Add passes that optimize machine instructions after register allocation.
+  if (OptLevel != CodeGenOptLevel::None)
+    invokeMachineLateOptimizationEPCallbacks(MFPM, OptLevel);
+
+  // Expand pseudo instructions before second scheduling pass.
+  MFPM.addPass(ExpandPostRAPseudosPass());
+
+  // Run pre-sched2 passes.
+  invokePreSched2EPCallbacks(MFPM, OptLevel);
+
+  if (Options.EnableImplicitNullChecks)
+    MFPM.addPass(ImplicitNullChecksPass());
+
+  // Second pass scheduler.
+  // Let Target optionally insert this pass by itself at some other
+  // point.
+  if (OptLevel != CodeGenOptLevel::None &&
+      !TM->targetSchedulesPostRAScheduling()) {
+    if (Options.MISchedPostRA)
+      MFPM.addPass(PostMachineSchedulerPass());
+    else
+      MFPM.addPass(PostRASchedulerPass());
+  }
+
+  // GC, replacement for GCMachineCodeAnalysis
+  MFPM.addPass(GCMachineCodeInsertionPass());
+
+  // Basic block placement.
+  if (OptLevel != CodeGenOptLevel::None) {
+    if (EnableFSDiscriminator) {
+      MFPM.addPass(
+          MIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass::Pass2));
+      const std::string ProfileFile = getFSProfileFile(TM, Options);
+      if (!ProfileFile.empty() && !Options.DisableLayoutFSProfileLoader)
+        MFPM.addPass(MIRProfileLoaderNewPass(
+            ProfileFile, getFSRemappingFile(TM, Options),
+            sampleprof::FSDiscriminatorPass::Pass2, nullptr));
+    }
+    MFPM.addPass(MachineBlockPlacementPass());
+  }
+
+  // Insert before XRay Instrumentation.
+  MFPM.addPass(FEntryInserterPass());
+  MFPM.addPass(XRayInstrumentationPass());
+  MFPM.addPass(PatchableFunctionPass());
+
+  invokePreEmitEPCallbacks(MFPM, OptLevel);
+
+  if (TM->Options.EnableIPRA)
+    // Collect register usage information and produce a register mask of
+    // clobbered registers, to be used to optimize call sites.
+    MFPM.addPass(RegUsageInfoCollectorPass());
+
+  // FIXME: Some backends are incompatible with running the verifier after
+  // addPreEmitPass.  Maybe only pass "false" here for those targets?
+  MFPM.addPass(FuncletLayoutPass());
+
+  MFPM.addPass(StackMapLivenessPass());
+  MFPM.addPass(LiveDebugValuesPass());
+  MFPM.addPass(MachineSanitizerBinaryMetadata());
+
+  if (TM->Options.EnableMachineOutliner && OptLevel != CodeGenOptLevel::None &&
+      Options.EnableMachineOutliner != RunOutliner::NeverOutline) {
+    bool RunOnAllFunctions =
+        (Options.EnableMachineOutliner == RunOutliner::AlwaysOutline);
+    bool AddOutliner =
+        RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
+    if (AddOutliner) {
+      FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+      if (Options.RequiresCodeGenSCCOrder)
+        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+            createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+      else
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      MPM.addPass(MachineOutlinerPass(RunOnAllFunctions));
+      FPM = FunctionPassManager();
+      MFPM = MachineFunctionPassManager();
+    }
+  }
+
+  if (Options.GCEmptyBlocks)
+    MFPM.addPass(GCEmptyBasicBlocksPass());
+
+  if (EnableFSDiscriminator)
+    MFPM.addPass(
+        MIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass::PassLast));
+
+  bool NeedsBBSections =
+      TM->getBBSectionsType() != llvm::BasicBlockSection::None;
+  // Machine function splitter uses the basic block sections feature. Both
+  // cannot be enabled at the same time. We do not apply machine function
+  // splitter if -basic-block-sections is requested.
+  if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter ||
+                           Options.EnableMachineFunctionSplitter)) {
+    const std::string ProfileFile = getFSProfileFile(TM, Options);
+    if (!ProfileFile.empty()) {
+      if (EnableFSDiscriminator) {
+        MFPM.addPass(MIRProfileLoaderNewPass(
+            ProfileFile, getFSRemappingFile(TM, Options),
+            sampleprof::FSDiscriminatorPass::PassLast, nullptr));
+      } else {
+        // Sample profile is given, but FSDiscriminator is not
+        // enabled, this may result in performance regression.
+        WithColor::warning()
----------------
arsenm wrote:

Shouldn't be spamming warnings here (but I'm guessing this is already an issue what whatever's in TargetPassConfig) 

https://github.com/llvm/llvm-project/pull/104725


More information about the llvm-commits mailing list