[llvm] [lld] [ThinLTO] Add module names to ThinLTO final objects (PR #74160)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 1 15:57:07 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (xur-llvm)

<details>
<summary>Changes</summary>

Emit the module name for final ThinLTO objects under
--lto-output-module-name. This will add the module name as part of the native objects or asm files for
 -Wl,--lto-emit-asm, -Wl,--lto-obj-path=<path>, and -Wl,--save-temps=prelink
    
Also fix the bug where these options won't work with thinlto-cache is used.

---

Patch is 35.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74160.diff


17 Files Affected:

- (modified) lld/ELF/Config.h (+1) 
- (modified) lld/ELF/Driver.cpp (+2) 
- (modified) lld/ELF/LTO.cpp (+55-13) 
- (modified) lld/ELF/Options.td (+3) 
- (added) lld/test/ELF/lto/thinlto_finallink_output.ll (+59) 
- (modified) llvm/include/llvm/LTO/LTO.h (+12-1) 
- (modified) llvm/include/llvm/ProfileData/InstrProfData.inc (+1) 
- (modified) llvm/include/llvm/Transforms/Instrumentation.h (+6) 
- (modified) llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h (+6) 
- (modified) llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h (+4-2) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+9-1) 
- (modified) llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp (+116-15) 
- (modified) llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (+2) 
- (added) llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll (+82) 
- (added) llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll (+78) 
- (added) llvm/test/Transforms/PGOProfile/cspgo_sample.ll (+112) 
- (added) llvm/test/Transforms/PGOProfile/instrprof_sample.ll (+47) 


``````````diff
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 56229334f9a44ae..afb8a692457d783 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -323,6 +323,7 @@ struct Config {
   bool zText;
   bool zRetpolineplt;
   bool zWxneeded;
+  bool ltoOutputModuleName;
   DiscardPolicy discard;
   GnuStackKind zGnustack;
   ICFLevel icf;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 5f88389a5840824..460e67eae888d9e 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1317,6 +1317,8 @@ static void readConfigs(opt::InputArgList &args) {
   else
     error("invalid codegen optimization level for LTO: " + Twine(ltoCgo));
   config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
+  config->ltoOutputModuleName = args.hasFlag(
+      OPT_lto_output_module_name, OPT_no_lto_output_module_name, false);
   config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
   config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
   config->ltoBasicBlockSections =
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 504c12aac6c5696..8dc5d814a04281f 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -351,23 +351,65 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
   if (!config->thinLTOCacheDir.empty())
     pruneCache(config->thinLTOCacheDir, config->thinLTOCachePolicy, files);
 
-  if (!config->ltoObjPath.empty()) {
-    saveBuffer(buf[0], config->ltoObjPath);
-    for (unsigned i = 1; i != maxTasks; ++i)
-      saveBuffer(buf[i], config->ltoObjPath + Twine(i));
-  }
+  auto doSaveBuffer = [&](const StringRef Arg, const StringRef Suffix = "") {
+    // There are a few cases:
+    // (1) path/test.o (using current directory)
+    // (2) /tmp/test-a7a1e4.o (using tmp directory)
+    // (3) if the input obj is in a archive. the module name is like
+    // "arch/x86/built-in.a(procfs.o at 11368)"
+    //
+    // This function replaces '/' and '(' with '-' and terminates at the
+    // last '.'.  it returns the following for the above cases, respectively,
+    // (1) path_test
+    // (2) tmp_test-a7a1e4 (remove the first /).
+    // (3) arch_x86_build-in.a_procfs
+    //
+    auto getFileNameString = [](const StringRef Str) {
+      if (Str.empty())
+        return std::string();
+      size_t End = Str.find_last_of(".");
+      size_t Begin = 0;
+      if (Str[0] == '/' || Str[0] == '\\')
+        Begin = 1;
+      std::string Ret = Str.substr(Begin, End - Begin).str();
+      auto position = std::string::npos;
+      while ((position = Ret.find_first_of("/\\(")) != std::string::npos) {
+        Ret.replace(position, 1, 1, '_');
+      }
+      return Ret;
+    };
+
+    auto saveBufferOrFile = [](const StringRef &Buf, const MemoryBuffer *File,
+                               const Twine &Path) {
+      if (Buf.empty() && File)
+        return saveBuffer(File->getBuffer(), Path);
+      saveBuffer(Buf, Path);
+    };
 
-  if (config->saveTempsArgs.contains("prelink")) {
     if (!buf[0].empty())
-      saveBuffer(buf[0], config->outputFile + ".lto.o");
-    for (unsigned i = 1; i != maxTasks; ++i)
-      saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o");
-  }
+      saveBufferOrFile(buf[0], files[0].get(), Arg + Suffix);
+    for (unsigned i = 1; i != maxTasks; ++i) {
+      if (!config->ltoOutputModuleName) {
+        saveBufferOrFile(buf[i], files[i].get(), Arg + Twine(i) + Suffix);
+      } else {
+        const std::string Name =
+            getFileNameString(ltoObj->getModuleName(i - 1));
+        saveBufferOrFile(buf[i], files[i].get(),
+                         Arg + "_" + Twine(i) + "_" + Name + Suffix);
+      }
+    }
+  };
+
+  if (!config->ltoObjPath.empty())
+    doSaveBuffer(config->ltoObjPath,
+                 config->ltoOutputModuleName ? ".lto.o" : "");
+
+  if (config->saveTempsArgs.contains("prelink"))
+    doSaveBuffer(config->outputFile, ".lto.o");
 
   if (config->ltoEmitAsm) {
-    saveBuffer(buf[0], config->outputFile);
-    for (unsigned i = 1; i != maxTasks; ++i)
-      saveBuffer(buf[i], config->outputFile + Twine(i));
+    doSaveBuffer(config->outputFile,
+                 config->ltoOutputModuleName ? ".lto.s" : "");
     return {};
   }
 
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 9a23f48350644a0..c2fa9237f9561f8 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -610,6 +610,9 @@ defm lto_pgo_warn_mismatch: BB<"lto-pgo-warn-mismatch",
 defm lto_known_safe_vtables : EEq<"lto-known-safe-vtables",
   "When --lto-validate-all-vtables-have-type-infos is enabled, skip validation on these vtables (_ZTV symbols)">;
 def lto_obj_path_eq: JJ<"lto-obj-path=">;
+defm lto_output_module_name: BB<"lto-output-module-name",
+  "In ThinLTO, using the module name in the final native objects or asm files",
+  "Do not include the module names in final objects and asm files">;
 def lto_sample_profile: JJ<"lto-sample-profile=">,
   HelpText<"Sample profile file path">;
 defm lto_validate_all_vtables_have_type_infos: BB<"lto-validate-all-vtables-have-type-infos",
diff --git a/lld/test/ELF/lto/thinlto_finallink_output.ll b/lld/test/ELF/lto/thinlto_finallink_output.ll
new file mode 100644
index 000000000000000..08c9aea7ae92cd0
--- /dev/null
+++ b/lld/test/ELF/lto/thinlto_finallink_output.ll
@@ -0,0 +1,59 @@
+; REQUIRES: x86
+;
+; RUN: cd %T
+; RUN: opt -module-summary %s -o obj1.o
+; RUN: opt -module-summary %p/Inputs/thinlto.ll -o obj2.o
+; RUN: opt -module-summary %s -o %t_obj3.o
+; RUN: opt -module-summary %p/Inputs/thinlto.ll -o %t_obj4.o
+;
+; Objects with a relative path.
+; RUN: rm -f *.lto.o *.s
+; RUN: ld.lld --lto-output-module-name --save-temps=prelink --lto-obj-path=aaa --lto-emit-asm --thinlto-jobs=1 --entry=f obj1.o obj2.o -o bin1
+; RUN: ls -1 *.lto.o *.s | FileCheck %s --check-prefixes=OBJPATHOUT1,PRELINKOUT1
+; With thinlto-jobs=all.
+; RUN: rm -f *.lto.o *.s
+; RUN: ld.lld --lto-output-module-name --save-temps=prelink --lto-obj-path=aaa --lto-emit-asm --thinlto-jobs=all --entry=f obj1.o obj2.o -o bin1
+; RUN: ls -1 *.lto.o *.s | FileCheck %s --check-prefixes=OBJPATHOUT1,PRELINKOUT1
+; Objects with an absolute path.
+; RUN: rm -f *.lto.o *.s
+; RUN: ld.lld --lto-output-module-name --save-temps=prelink --lto-obj-path=aaa --lto-emit-asm --thinlto-jobs=1 --entry=f %t_obj3.o %t_obj4.o -o bin2
+; RUN: ls -1 *.lto.o *.s | FileCheck %s --check-prefixes=OBJPATHOUT2,PRELINKOUT2
+; Objects in an archive
+; RUN: rm -f *.lto.o *.s
+; RUN: llvm-ar rcS ar.a obj1.o obj2.o
+; RUN: ld.lld --lto-output-module-name --save-temps=prelink --lto-obj-path=aaa --lto-emit-asm --thinlto-jobs=1 --entry=f ar.a -o bin1
+; RUN: ls -1 *.lto.o *.s | FileCheck %s --check-prefixes=OBJPATHOUT3,PRELINKOUT3
+; Use with thinlto-cahce
+; RUN: rm -f *.lto.o *.s
+; RUN: ld.lld --lto-output-module-name --save-temps=prelink --thinlto-cache-dir=thinlto-cache --lto-emit-asm --lto-obj-path=aaa --thinlto-jobs=1 --entry=f obj1.o obj2.o -o bin1
+; RUN: ls -1 *.lto.o *.s | FileCheck %s --check-prefixes=OBJPATHOUT1,PRELINKOUT1
+;
+; OBJPATHOUT1-DAG: aaa_1_obj1.lto.o
+; OBJPATHOUT1-DAG: aaa_2_obj2.lto.o
+; PRELINKOUT1-DAG: bin1_1_obj1.lto.o
+; PRELINKOUT1-DAG: bin1_2_obj2.lto.o
+; PRELINKOUT1-DAG: bin1_1_obj1.lto.s
+; PRELINKOUT1-DAG: bin1_2_obj2.lto.s
+; OBJPATHOUT2-DAG: aaa_1_{{.*}}_obj3.lto.o
+; OBJPATHOUT2-DAG: aaa_2_{{.*}}obj4.lto.o
+; PRELINKOUT2-DAG: bin2_1_{{.*}}obj3.lto.o
+; PRELINKOUT2-DAG: bin2_2_{{.*}}obj4.lto.o
+; PRELINKOUT2-DAG: bin2_1_{{.*}}obj3.lto.s
+; PRELINKOUT2-DAG: bin2_2_{{.*}}obj4.lto.s
+; OBJPATHOUT3-DAG: aaa_1_ar.a_obj1.lto.o
+; OBJPATHOUT3-DAG: aaa_2_ar.a_obj2.lto.o
+; PRELINKOUT3-DAG: bin1_1_ar.a_obj1.lto.o
+; PRELINKOUT3-DAG: bin1_2_ar.a_obj2.lto.o
+; PRELINKOUT3-DAG: bin1_1_ar.a_obj1.lto.s
+; PRELINKOUT3-DAG: bin1_2_ar.a_obj2.lto.s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(...)
+
+define void @f() {
+entry:
+  call void (...) @g()
+  ret void
+}
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index be85c40983475ff..4cb3a512f88bd23 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -299,7 +299,18 @@ class LTO {
 
   /// Static method that returns a list of libcall symbols that can be generated
   /// by LTO but might not be visible from bitcode symbol table.
-  static ArrayRef<const char*> getRuntimeLibcallSymbols();
+  static ArrayRef<const char *> getRuntimeLibcallSymbols();
+
+  /// Return the name of n-th module. This only applies to ThinLTO.
+  StringRef getModuleName(size_t N) const {
+    size_t I = N;
+    if (I >= ThinLTO.ModuleMap.size())
+      return "";
+    auto it = ThinLTO.ModuleMap.begin();
+    while (I--)
+      it++;
+    return (*it).first;
+  }
 
 private:
   Config Conf;
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 13be2753e514efe..6294505ac396856 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -676,6 +676,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
 #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias
 #define INSTR_PROF_PROFILE_SET_TIMESTAMP __llvm_profile_set_timestamp
+#define INSTR_PROF_PROFILE_SAMPLING_VAR __llvm_profile_sampling
 
 /* The variable that holds the name of the profile data
  * specified via command line. */
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index 392983a19844451..76d4e1de75154ff 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -116,12 +116,18 @@ struct InstrProfOptions {
   // Use BFI to guide register promotion
   bool UseBFIInPromotion = false;
 
+  // Use sampling to reduce the profile instrumentation runtime overhead.
+  bool Sampling = false;
+
   // Name of the profile file to use as output
   std::string InstrProfileOutput;
 
   InstrProfOptions() = default;
 };
 
+// Create the variable for profile sampling.
+void createProfileSamplingVar(Module &M);
+
 // Options for sanitizer coverage instrumentation.
 struct SanitizerCoverageOptions {
   enum Type {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index cb0c055dcb74ae8..d0581ff72a15864 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -86,6 +86,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
   /// Returns true if profile counter update register promotion is enabled.
   bool isCounterPromotionEnabled() const;
 
+  /// Return true if profile sampling is enabled.
+  bool isSamplingEnabled() const;
+
   /// Count the number of instrumented value sites for the function.
   void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
 
@@ -109,6 +112,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
   /// acts on.
   Value *getCounterAddress(InstrProfInstBase *I);
 
+  /// Lower the incremental instructions under profile sampling predicates.
+  void doSampling(Instruction *I);
+
   /// Get the region counters for an increment, creating them if necessary.
   ///
   /// If the counter array doesn't yet exist, the profile data variables
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index 5b1977b7de9a2ae..7199f27dbc991a8 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -43,12 +43,14 @@ class FileSystem;
 class PGOInstrumentationGenCreateVar
     : public PassInfoMixin<PGOInstrumentationGenCreateVar> {
 public:
-  PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
-      : CSInstrName(CSInstrName) {}
+  PGOInstrumentationGenCreateVar(std::string CSInstrName = "",
+                                 bool Sampling = false)
+      : CSInstrName(CSInstrName), ProfileSampling(Sampling) {}
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 
 private:
   std::string CSInstrName;
+  bool ProfileSampling;
 };
 
 /// The instrumentation (profile-instr-gen) pass for IR based PGO.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 600f8d43caaf216..5595f92e24aa861 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -273,6 +273,9 @@ static cl::opt<AttributorRunOption> AttributorRun(
                clEnumValN(AttributorRunOption::NONE, "none",
                           "disable attributor runs")));
 
+static cl::opt<bool> EnableSampledInstr(
+    "enable-sampled-instr", cl::init(false), cl::Hidden,
+    cl::desc("Enable profile instrumentation sampling (default = off)"));
 static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
@@ -805,6 +808,10 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
   // Do counter promotion at Level greater than O0.
   Options.DoCounterPromotion = true;
   Options.UseBFIInPromotion = IsCS;
+  if (EnableSampledInstr) {
+    Options.Sampling = true;
+    Options.DoCounterPromotion = false;
+  }
   Options.Atomic = AtomicCounterUpdate;
   MPM.addPass(InstrProfiling(Options, IsCS));
 }
@@ -1117,7 +1124,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   }
   if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
       PGOOpt->CSAction == PGOOptions::CSIRInstr)
-    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
+                                               EnableSampledInstr));
 
   if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
       !PGOOpt->MemoryProfile.empty())
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 57fcfd53836911b..89e8e152fcee7e4 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/InitializePasses.h"
@@ -48,6 +49,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
@@ -148,6 +150,16 @@ cl::opt<bool> SkipRetExitBlock(
     "skip-ret-exit-block", cl::init(true),
     cl::desc("Suppress counter promotion if exit blocks contain ret."));
 
+static cl::opt<bool>
+    SampledInstrument("sampled-instr", cl::ZeroOrMore, cl::init(false),
+                      cl::desc("Do PGO instrumentation sampling"));
+
+static cl::opt<unsigned> SampledInstrumentDuration(
+    "sampled-instr-duration",
+    cl::desc("Set the sample rate for profile instrumentation, with a value "
+             "range 0 to 65535. We will record this number of samples for "
+             "every 65536 count updates"),
+    cl::init(200));
 ///
 /// A helper class to promote one counter RMW operation in the loop
 /// into register update.
@@ -412,30 +424,91 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
   return PreservedAnalyses::none();
 }
 
+// Perform instrumentation sampling.
+// We transform:
+//   Increment_Instruction;
+// to:
+//   if (__llvm_profile_sampling__ <= SampleDuration) {
+//     Increment_Instruction;
+//   }
+//   __llvm_profile_sampling__ += 1;
+//
+// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
+// instrumentation variables (value-instrumentation and edge instrumentation).
+// It has a unsigned short type and will wrapper around when overflow.
+//
+// Note that, the code snippet after the transformation can still be
+// counter promoted. But I don't see a reason for that because the
+// counter updated should be sparse. That's the reason we disable
+// counter promotion by default when sampling is enabled.
+// This can be overwritten by the internal option.
+//
+void InstrProfiling::doSampling(Instruction *I) {
+  if (!isSamplingEnabled())
+    return;
+  int SampleDuration = SampledInstrumentDuration.getValue();
+  unsigned WrapToZeroValue = USHRT_MAX + 1;
+  assert(SampleDuration < USHRT_MAX);
+  auto *Int16Ty = Type::getInt16Ty(M->getContext());
+  auto *CountVar =
+      M->getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
+  assert(CountVar && "CountVar not set properly");
+  IRBuilder<> CondBuilder(I);
+  auto *LoadCountVar = CondBuilder.CreateLoad(Int16Ty, CountVar);
+  auto *DurationCond = CondBuilder.CreateICmpULE(
+      LoadCountVar, CondBuilder.getInt16(SampleDuration));
+  MDBuilder MDB(I->getContext());
+  MDNode *BranchWeight =
+      MDB.createBranchWeights(SampleDuration, WrapToZeroValue - SampleDuration);
+  Instruction *ThenTerm = SplitBlockAndInsertIfThen(
+      DurationCond, I, /* Unreacheable */ false, BranchWeight);
+  IRBuilder<> IncBuilder(I);
+  auto *NewVal = IncBuilder.CreateAdd(LoadCountVar, IncBuilder.getInt16(1));
+  IncBuilder.CreateStore(NewVal, CountVar);
+  I->moveBefore(ThenTerm);
+}
+
 bool InstrProfiling::lowerIntrinsics(Function *F) {
   bool MadeChange = false;
   PromotionCandidates.clear();
+  SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
+
   for (BasicBlock &BB : *F) {
     for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
-      if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
-        lowerIncrement(IPIS);
-        MadeChange = true;
-      } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
-        lowerIncrement(IPI);
-        MadeChange = true;
-      } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
-        lowerTimestamp(IPC);
-        MadeChange = true;
-      } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
-        lowerCover(IPC);
-        MadeChange = true;
-      } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
-        lowerValueProfileInst(IPVP);
-        MadeChange = true;
+      if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr)) {
+        InstrProfInsts.push_back(IP);
       }
     }
   }
 
+  for (auto *IP : InstrProfInsts) {
+    if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(IP)) {
+      doSampling(IP);
+      lowerIncrement(IPIS);
+      MadeChange = true;
+    } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(IP)) {
+      doSampling(IP);
+      lowerIncrement(IPI);
+      MadeChange = true;
+    } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(IP)) {
+      doSampling(IP);
+      lowerTimestamp(IPC);
+      MadeChange = true;
+    } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(IP)) {
+      doSampling(IP);
+      lowerCover(IPC);
+      MadeChange = true;
+    } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(IP)) {
+      doSampling(IP);
+      lowerValueProfileInst(IPVP);
+      MadeChange = true;
+    } else {
+      LLVM_DEBUG(dbgs() << "Invalid InstroProf intrinsic: " << *IP << "\n");
+      // ?? Seeing "call void @llvm.memcpy.p0.p0.i64..." here ??
+      // llvm_unreachable("Invalid InstroProf intrinsic");
+    }
+  }
+
   if (!MadeChange)
     return false;
 
@@ -455,6 +528,12 @@ bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
   return TT.isOSFuchsia();
 }
 
+bool InstrProfiling::isSamplingEnabled() const {
+  if (SampledInstrument.getNumOccurrences() > 0)
+    return SampledInstrument;
+  return Options.Sampling;
+}
+
 bool InstrProfiling::isCounterPromotionEnabled() const {
   if (DoCounterPromotion.getNumOccurrences() > 0)
     return DoCounterPromotion;
@@ -535,6 +614,9 @@ bool InstrProfiling::run(
   if (NeedsRuntimeHook)
     MadeChange = emitRuntimeHook();
 
+  if (!IsCS && i...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/74160


More information about the llvm-commits mailing list